Source code for mindspore_gl.graph.csr_convert

# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Convert the coo graph to the csr graph."""
import numpy as np
import mindspore as ms
import scipy


def csr_data(row_indices, col_indices, n_nodes, n_edges):
    """Convert the COO format to the CSR format."""
    coo_tensor_forward = scipy.sparse.coo_matrix(
        (np.ones(n_edges), (row_indices, col_indices)), shape=(n_nodes, n_nodes))
    csr_tensor_forward = coo_tensor_forward.tocsr()
    indptr = np.asarray(csr_tensor_forward.indptr, np.int32)
    indices = np.asarray(csr_tensor_forward.indices, np.int32)
    coo_tensor_backward = scipy.sparse.csr_matrix(
        (np.ones(n_edges), (col_indices, row_indices)), shape=(n_nodes, n_nodes))
    csr_tensor_backward = coo_tensor_backward.tocsr()
    indptr_backward = ms.Tensor(np.asarray(csr_tensor_backward.indptr), dtype=ms.int32)
    indices_backward = ms.Tensor(np.asarray(csr_tensor_backward.indices), dtype=ms.int32)
    indices = ms.Tensor(indices, ms.int32)
    indptr = ms.Tensor(indptr, ms.int32)
    return indices, indptr, indices_backward, indptr_backward

def rerank_index(out_deg, row_indices, col_indices):
    """reorder the index according to the out degree"""
    idx_forward = np.argsort(out_deg)[::-1]
    arg_idx_forward = np.argsort(idx_forward)
    arg_idx_forward = np.array(arg_idx_forward, np.int32)
    row_indices_forward = arg_idx_forward[row_indices]
    col_indices_forward = arg_idx_forward[col_indices]
    return row_indices_forward, col_indices_forward, idx_forward, arg_idx_forward

[docs]def graph_csr_data(src_idx, dst_idx, n_nodes, n_edges, node_feat=None, node_label=None, train_mask=None, val_mask=None, test_mask=None, rerank=False): r""" Convert the entire graph in the COO format to the CSR format. Args: src_idx (Union[Tensor, numpy.ndarray]): tensor with shape :math:`(N\_EDGES)`, with int dtype, represents the source node index of COO edge matrix. dst_idx (Union[Tensor, numpy.ndarray]): tensor with shape :math:`(N\_EDGES)`, with int dtype, represents the destination node index of COO edge matrix. n_nodes (int): integer, represent the nodes count of the graph. n_edges (int): integer, represent the edges count of the graph. node_feat (Union[Tensor, numpy.ndarray, optional]): node feature. Default: ``None``. node_label (Union[Tensor, numpy.ndarray, optional]): node labels. Default: ``None``. train_mask (Union[Tensor, numpy.ndarray, optional]): mask of train index. Default: ``None``. val_mask (Union[Tensor, numpy.ndarray, optional]): msk of train index. Default: ``None``. test_mask (Union[Tensor, numpy.ndarray, optional]): mask of train index. Default: ``None``. rerank (bool, optional): whether to reorder node features, node labels, and masks. Default: ``False``. Returns: - **csr_g** (tuple) - info of csr graph, it contains indices of csr graph, indptr of csr graph, node numbers of csr graph, edges numbers of csr graph, pre-stored backward indices of csr graph, pre-stored backward indptr of csr graph. - **in_deg** - in degree of each node. - **out_deg** - out degree of each node. - **node_feat** (Union[Tensor, numpy.ndarray, optional]) - reorder node features. - **node_label** (Union[Tensor, numpy.ndarray, optional]) - reorder node labels. - **train_mask** (Union[Tensor, numpy.ndarray, optional]) - reorder train index mask. - **val_mask** (Union[Tensor, numpy.ndarray, optional]) - reorder val index mask. - **test_mask** (Union[Tensor, numpy.ndarray, optional]) - reorder test index mask. Supported Platforms: ``Ascend`` ``GPU`` Examples: >>> import numpy as np >>> from mindspore_gl.graph import graph_csr_data >>> node_feat = np.array([[1, 2, 3, 4], [2, 4, 1, 3], [1, 3, 2, 4], ... [9, 7, 5, 8], [8, 7, 6, 5], [8, 6, 4, 6], [1, 2, 1, 1]], np.float32) >>> n_nodes = 7 >>> n_edges = 8 >>> edge_feat_size = 7 >>> src_idx = np.array([0, 2, 2, 3, 4, 5, 5, 6], np.int32) >>> dst_idx = np.array([1, 0, 1, 5, 3, 4, 6, 4], np.int32) >>> node_label = np.array([0, 1, 0, 1, 0, 1, 0]) >>> train_mask = np.array([True, True, True, True, False, False, False]) >>> val_mask = np.array([False, False, False, False, True, True, True]) >>> g, in_deg, out_deg, node_feat, node_label, train_mask, val_mask,\ >>> test_mask = graph_csr_data(src_idx,dst_idx, n_nodes, n_edges, node_feat, node_label, ... train_mask, val_mask, test_mask=None, rerank=True) >>> print(g[0], g[1]) [2 3 5 6 3 4 0 6] [0 2 4 5 6 7 8 8] >>> print(node_feat, node_label) [[8. 7. 6. 5.] [2. 4. 1. 3.] [1. 2. 1. 1.] [8. 6. 4. 6.] [9. 7. 5. 8.] [1. 2. 3. 4.] [1. 3. 2. 4.]] [0 1 0 1 1 0 0] >>> print(train_mask, val_mask) [False True False False True True True] [ True False True True False False False] """ if isinstance(dst_idx, ms.Tensor): dst_idx = dst_idx.asnumpy() if isinstance(src_idx, ms.Tensor): src_idx = src_idx.asnumpy() row_indices = np.array(dst_idx, np.int32) col_indices = np.array(src_idx, np.int32) out_deg = np.bincount(row_indices, minlength=n_nodes) in_deg = np.bincount(col_indices, minlength=n_nodes) if not rerank: indices, indptr, indices_backward, indptr_backward = csr_data(row_indices, col_indices, n_nodes, n_edges) else: row_indices_forward, col_indices_forward, idx_forward, _ = rerank_index(out_deg, row_indices, col_indices) indices, indptr, indices_backward, indptr_backward = csr_data(row_indices_forward, col_indices_forward, n_nodes, n_edges) idx_forward = idx_forward.tolist() in_deg = in_deg[idx_forward] out_deg = out_deg[idx_forward] node_feat = node_feat[idx_forward] node_label = node_label[idx_forward] if train_mask is not None: train_mask = train_mask[idx_forward] if val_mask is not None: val_mask = val_mask[idx_forward] if test_mask is not None: test_mask = test_mask[idx_forward] in_deg = ms.Tensor(in_deg, ms.int32) out_deg = ms.Tensor(out_deg, ms.int32) csr_g = (indices, indptr, n_nodes, n_edges, indices_backward, indptr_backward) return csr_g, in_deg, out_deg, node_feat, node_label, train_mask, val_mask, test_mask
[docs]def sampling_csr_data(src_idx, dst_idx, n_nodes, n_edges, seeds_idx=None, node_feat=None, rerank=False): r""" Convert the sampling graph in the COO format to the CSR format. Args: src_idx (Union[Tensor, numpy.ndarray]): tensor with shape :math:`(N\_EDGES)`, with int dtype, represents the source node index of COO edge matrix. dst_idx (Union[Tensor, numpy.ndarray]): tensor with shape :math:`(N\_EDGES)`, with int dtype, represents the destination node index of COO edge matrix. n_nodes (int): integer, represent the nodes count of the graph. n_edges (int): integer, represent the edges count of the graph. seeds_idx (numpy.ndarray): start nodes for neighbor sampling. Default: ``None``. node_feat (Union[Tensor, numpy.ndarray], optional): node feature. Default: ``None``. rerank (bool, optional): whether to reorder node features, node labels, and masks. Default: ``False``. Returns: - **csr_g** (tuple) - info of csr graph, it contains indices of csr graph, indptr of csr graph, node numbers of csr graph, edges numbers of csr graph, pre-stored backward indices of csr graph, pre-stored backward indptr of csr graph. - **seeds_idx** (numpy.ndarray) - reordered start nodes. - **node_feat** (numpy.ndarray) - reorder node features. Supported Platforms: ``Ascend`` ``GPU`` Examples: >>> import numpy as np >>> from mindspore_gl.graph import sampling_csr_data >>> node_feat = np.array([[1, 2, 3, 4], [2, 4, 1, 3], [1, 3, 2, 4], ... [9, 7, 5, 8], [8, 7, 6, 5], [8, 6, 4, 6], [1, 2, 1, 1]], np.float32) >>> n_nodes = 7 >>> n_edges = 8 >>> edge_feat_size = 7 >>> src_idx = np.array([0, 2, 2, 3, 4, 5, 5, 6], np.int32) >>> dst_idx = np.array([1, 0, 1, 5, 3, 4, 6, 4], np.int32) >>> seeds_idx = np.array([0, 3, 5]) >>> g, seeds_idx, node_feat = sampling_csr_data(src_idx, dst_idx, n_nodes, n_edges,\ ... seeds_idx, node_feat, rerank=True) >>> print(g[0], g[1], seeds_idx) [2 3 5 6 3 4 0 6] [0 2 4 5 6 7 8 8] [5, 4, 3] >>> print(node_feat) [[8. 7. 6. 5.] [2. 4. 1. 3.] [1. 2. 1. 1.] [8. 6. 4. 6.] [9. 7. 5. 8.] [1. 2. 3. 4.] [1. 3. 2. 4.]] """ if isinstance(dst_idx, ms.Tensor): dst_idx = dst_idx.asnumpy() if isinstance(src_idx, ms.Tensor): src_idx = src_idx.asnumpy() row_indices = np.array(dst_idx, np.int32) col_indices = np.array(src_idx, np.int32) out_deg = np.bincount(row_indices, minlength=n_nodes) if not rerank: indices, indptr, indices_backward, indptr_backward = csr_data(row_indices, col_indices, n_nodes, n_edges) else: row_indices_forward, col_indices_forward, idx_forward, arg_idx_forward = rerank_index(out_deg, row_indices, col_indices) indices, indptr, indices_backward, indptr_backward = csr_data(row_indices_forward, col_indices_forward, n_nodes, n_edges) idx_forward = idx_forward.tolist() origin_idx = list(range(len(arg_idx_forward))) idx_dict = dict(zip(origin_idx, arg_idx_forward)) seeds_idx = [idx_dict[i] for i in seeds_idx] node_feat = node_feat[idx_forward, :] csr_g = (indices, indptr, n_nodes, n_edges, indices_backward, indptr_backward) return csr_g, seeds_idx, node_feat
[docs]def batch_graph_csr_data(src_idx, dst_idx, n_nodes, n_edges, node_map_idx, node_feat=None, rerank=False): r""" Convert the batched graph in the COO format to the CSR format. Args: src_idx (Union[Tensor, numpy.ndarray]): tensor with shape :math:`(N\_EDGES)`, with int dtype, represents the source node index of COO edge matrix. dst_idx (Union[Tensor, numpy.ndarray]): tensor with shape :math:`(N\_EDGES)`, with int dtype, represents the destination node index of COO edge matrix. n_nodes (int): integer, represent the nodes count of the graph. n_edges (int): integer, represent the edges count of the graph. node_map_idx (numpy.ndarray): ID of the subgraph to each node belongs to. node_feat (Union[Tensor, numpy.ndarray, optional]): node feature. Default: ``None``. rerank (bool, optional): whether to reorder node features, node labels, and masks. Default: ``False``. Returns: - **csr_g** (tuple) - info of csr graph, it contains indices of csr graph, indptr of csr graph, node numbers of csr graph, edges numbers of csr graph, pre-stored backward indices of csr graph, pre-stored backward indptr of csr graph. - **node_map_idx** (numpy.ndarray) - reordered start map index. - **node_feat** (Union[Tensor, numpy.ndarray, optional]) - reorder node features. Supported Platforms: ``Ascend`` ``GPU`` Examples: >>> import numpy as np >>> from mindspore_gl.graph import batch_graph_csr_data >>> node_feat = np.array([[1, 2, 3, 4], [2, 4, 1, 3], [1, 3, 2, 4], ... [9, 7, 5, 8], [8, 7, 6, 5], [8, 6, 4, 6], [1, 2, 1, 1]], np.float32) >>> n_nodes = 7 >>> n_edges = 8 >>> edge_feat_size = 7 >>> src_idx = np.array([0, 2, 2, 3, 4, 5, 5, 6], np.int32) >>> dst_idx = np.array([1, 0, 1, 5, 3, 4, 6, 4], np.int32) >>> node_map_idx = np.array([0, 0, 0, 0, 1, 1, 1]) >>> g, node_map_idx, node_feat = batch_graph_csr_data(src_idx, dst_idx,\ ... n_nodes, n_edges, node_map_idx, node_feat, rerank=True) >>> print(g[0], g[1], node_map_idx) [2 3 5 6 3 4 0 6] [0 2 4 5 6 7 8 8] [1 0 1 1 0 0 0] >>> print(node_feat) [[8. 7. 6. 5.] [2. 4. 1. 3.] [1. 2. 1. 1.] [8. 6. 4. 6.] [9. 7. 5. 8.] [1. 2. 3. 4.] [1. 3. 2. 4.]] """ if isinstance(dst_idx, ms.Tensor): dst_idx = dst_idx.asnumpy() if isinstance(src_idx, ms.Tensor): src_idx = src_idx.asnumpy() row_indices = np.array(dst_idx, np.int32) col_indices = np.array(src_idx, np.int32) out_deg = np.bincount(row_indices, minlength=n_nodes) if not rerank: indices, indptr, indices_backward, indptr_backward = csr_data(row_indices, col_indices, n_nodes, n_edges) else: row_indices_forward, col_indices_forward, idx_forward, _ = rerank_index(out_deg, row_indices, col_indices) indices, indptr, indices_backward, indptr_backward = csr_data(row_indices_forward, col_indices_forward, n_nodes, n_edges) idx_forward = idx_forward.tolist() node_feat = node_feat[idx_forward] node_map_idx = node_map_idx[idx_forward] csr_g = (indices, indptr, n_nodes, n_edges, indices_backward, indptr_backward) return csr_g, node_map_idx, node_feat