python source code of reddit

pytorch_geometric-master
- .github
  - ISSUE_TEMPLATE
    - bug-report.md
    - question-help.md
    - installation.md
    - feature-request.md
- script
  - conda.sh
- .coveragerc
- examples
  - ogbn_products_gat.py
  - mnist_voxel_grid.py
  - reddit.py
  - link_pred.py
  - mutag_gin.py
  - infomax.py
  - dgcnn_segmentation.py
  - geniepath.py
  - agnn.py
  - mnist_nn_conv.py
  - triangles_sag_pool.py
  - pointnet2_classification.py
  - faust.py
  - sign.py
  - cora.py
  - metapath2vec.py
  - qm9_nn_conv.py
  - jit
    - gin.py
    - gat.py
  - mnist_graclus.py
  - pointnet2_segmentation.py
  - gnn_explainer.py
  - ppi.py
  - cluster_gcn_reddit.py
  - ogbn_products_sage.py
  - qm9_pretrained_dimenet.py
  - proteins_diff_pool.py
  - data_parallel.py
  - graph_saint.py
  - tensorboard_logging.py
  - tagcn.py
  - gat.py
  - gcn.py
  - argva_node_clustering.py
  - cluster_gcn_ppi.py
  - proteins_mincut_pool.py
  - qm9_pretrained_schnet.py
  - node2vec.py
  - graph_unet.py
  - arma.py
  - sgc.py
  - signed_gcn.py
  - autoencoder.py
  - dgcnn_classification.py
  - pna.py
  - proteins_topk_pool.py
  - dna.py
  - colors_topk_pool.py
  - rgcn.py
  - renet.py
  - seal_link_pred.py
- readthedocs.yml
- LICENSE
- test
  - datasets
    - test_enzymes.py
    - test_snap_dataset.py
    - test_planetoid.py
    - test_suite_sparse.py
    - test_bzr.py
    - test_mutag.py
    - test_karate.py
  - utils
    - test_grid.py
    - test_random.py
    - test_sparse.py
    - test_to_dense_batch.py
    - test_convert.py
    - test_dropout.py
    - test_undirected.py
    - test_softmax.py
    - test_repeat.py
    - test_loop.py
    - test_metric.py
    - test_sort_edge_index.py
    - test_negative_sampling.py
    - test_to_dense_adj.py
    - test_degree.py
    - test_normalized_cut.py
    - test_get_laplacian.py
    - test_subgraph.py
    - test_train_test_split_edges.py
    - test_geodesic.py
    - test_isolated.py
  - nn
    - pool
      - test_sag_pool.py
      - test_asap.py
      - test_topk_pool.py
      - test_avg_pool.py
      - test_graclus.py
      - test_max_pool.py
      - test_voxel_grid.py
      - test_consecutive.py
      - test_edge_pool.py
    - test_inits.py
    - models
      - test_jumping_knowledge.py
      - test_graph_unet.py
      - test_re_net.py
      - test_deep_graph_infomax.py
      - test_gnn_explainer.py
      - test_metapath2vec.py
      - test_signed_gcn.py
      - test_autoencoder.py
      - test_node2vec.py
    - test_reshape.py
    - unpool
      - test_knn_interpolate.py
    - test_data_parallel.py
    - dense
      - test_dense_gcn_conv.py
      - test_dense_sage_conv.py
      - test_dense_graph_conv.py
      - test_mincut_pool.py
      - test_diff_pool.py
      - test_dense_gin_conv.py
    - conv
      - test_pna_conv.py
      - test_dna_conv.py
      - test_gat_conv.py
      - test_le_conv.py
      - test_rgcn_conv.py
      - test_arma_conv.py
      - test_gravnet_conv.py
      - test_mf_conv.py
      - test_create_gnn.py
      - test_ppf_conv.py
      - test_point_conv.py
      - test_static_graph.py
      - test_sg_conv.py
      - test_cluster_gcn_conv.py
      - test_hypergraph_conv.py
      - test_appnp.py
      - test_feast_conv.py
      - test_cheb_conv.py
      - test_cg_conv.py
      - test_gcn_conv.py
      - test_sage_conv.py
      - test_x_conv.py
      - test_gated_graph_conv.py
      - test_tag_conv.py
      - test_nn_conv.py
      - test_spline_conv.py
      - test_agnn_conv.py
      - test_edge_conv.py
      - test_graph_conv.py
      - test_signed_conv.py
      - test_message_passing.py
      - test_gin_conv.py
      - test_gmm_conv.py
    - test_meta.py
    - glob
      - test_sort.py
      - test_attention.py
      - test_set2set.py
      - test_glob.py
    - norm
      - test_graph_size_norm.py
      - test_instance_norm.py
      - test_batch_norm.py
  - data
    - test_inherit.py
    - test_cluster.py
    - test_split.py
    - test_dataset.py
    - test_batch.py
    - test_dataloader.py
    - test_data.py
    - test_graph_saint.py
    - test_sampler.py
  - io
    - example2.off
    - test_off.py
    - example1.off
  - test_debug.py
  - visualization
    - test_influence.py
  - transforms
    - test_laplacian_lambda_max.py
    - test_center.py
    - test_normalize_scale.py
    - test_radius_graph.py
    - test_target_indegree.py
    - test_random_scale.py
    - test_random_shear.py
    - test_normalize_rotation.py
    - test_sample_points.py
    - test_generate_normals.py
    - test_delaunay.py
    - test_constant.py
    - test_gdc.py
    - test_polar.py
    - test_random_flip.py
    - test_cartesian.py
    - test_two_hop.py
    - test_random_translate.py
    - test_local_cartesian.py
    - test_remove_isolated_nodes.py
    - test_spherical.py
    - test_one_hot_degree.py
    - test_point_pair_features.py
    - test_to_superpixels.py
    - test_distance.py
    - test_add_self_loops.py
    - test_fixed_points.py
    - test_grid_sampling.py
    - test_compose.py
    - test_to_dense.py
    - test_random_rotate.py
    - test_line_graph.py
    - test_linear_transformation.py
    - test_face_to_edge.py
    - test_normalize_features.py
    - test_local_degree_profile.py
    - test_knn_graph.py
- CONTRIBUTING.md
- setup.py
- docker
  - singularity
  - README.md
  - Dockerfile
- .style.yapf
- torch_geometric
  - datasets
    - aminer.py
    - planetoid.py
    - reddit.py
    - yelp.py
    - entities.py
    - amazon.py
    - s3dis.py
    - icews.py
    - gdelt.py
    - dynamic_faust.py
    - pascal_pf.py
    - faust.py
    - citation_full.py
    - shrec2016.py
    - flickr.py
    - coma.py
    - suite_sparse.py
    - tu_dataset.py
    - ppi.py
    - geometry.py
    - willow_object_class.py
    - tosca.py
    - zinc.py
    - modelnet.py
    - dbp15k.py
    - pcpnet_dataset.py
    - coauthor.py
    - pascal.py
    - qm9.py
    - karate.py
    - __init__.py
    - bitcoin_otc.py
    - mnist_superpixels.py
    - gnn_benchmark_dataset.py
    - word_net.py
    - snap_dataset.py
    - particle.py
    - molecule_net.py
    - shapenet.py
    - qm7.py
    - ged_dataset.py
  - utils
    - to_dense_adj.py
    - subgraph.py
    - undirected.py
    - metric.py
    - convert.py
    - softmax.py
    - degree.py
    - normalized_cut.py
    - negative_sampling.py
    - random.py
    - isolated.py
    - grid.py
    - hetero.py
    - to_dense_batch.py
    - get_laplacian.py
    - sort_edge_index.py
    - dropout.py
    - loop.py
    - repeat.py
    - num_nodes.py
    - __init__.py
    - tree_decomposition.py
    - sparse.py
    - geodesic.py
    - train_test_split_edges.py
  - nn
    - pool
      - pool.py
      - edge_pool.py
      - consecutive.py
      - sag_pool.py
      - max_pool.py
      - voxel_grid.py
      - asap.py
      - topk_pool.py
      - __init__.py
      - avg_pool.py
      - graclus.py
    - acts.py
    - inits.py
    - models
      - metapath2vec.py
      - dimenet_utils.py
      - gnn_explainer.py
      - jumping_knowledge.py
      - node2vec.py
      - deep_graph_infomax.py
      - graph_unet.py
      - __init__.py
      - signed_gcn.py
      - dimenet.py
      - re_net.py
      - autoencoder.py
      - schnet.py
    - data_parallel.py
    - unpool
      - __init__.py
      - knn_interpolate.py
    - meta.py
    - dense
      - dense_graph_conv.py
      - dense_sage_conv.py
      - mincut_pool.py
      - dense_gin_conv.py
      - diff_pool.py
      - __init__.py
      - dense_gcn_conv.py
    - __init__.py
    - conv
      - gat_conv.py
      - point_conv.py
      - cluster_gcn_conv.py
      - utils
        inspector.py
        jit.py
        __init__.py
        helpers.py
        typing.py
      - cheb_conv.py
      - gmm_conv.py
      - arma_conv.py
      - sage_conv.py
      - mf_conv.py
      - graph_conv.py
      - gin_conv.py
      - gated_graph_conv.py
      - signed_conv.py
      - edge_conv.py
      - rgcn_conv.py
      - agnn_conv.py
      - x_conv.py
      - nn_conv.py
      - appnp.py
      - tag_conv.py
      - le_conv.py
      - gcn_conv.py
      - cg_conv.py
      - dna_conv.py
      - __init__.py
      - feast_conv.py
      - pna_conv.py
      - message_passing.jinja
      - hypergraph_conv.py
      - gravnet_conv.py
      - spline_conv.py
      - ppf_conv.py
      - sg_conv.py
      - message_passing.py
    - glob
      - set2set.py
      - attention.py
      - glob.py
      - __init__.py
      - sort.py
    - reshape.py
    - norm
      - batch_norm.py
      - graph_size_norm.py
      - instance_norm.py
      - __init__.py
  - data
    - extract.py
    - data.py
    - sampler.py
    - download.py
    - dataloader.py
    - graph_saint.py
    - makedirs.py
    - batch.py
    - __init__.py
    - cluster.py
    - dataset.py
    - in_memory_dataset.py
  - io
    - planetoid.py
    - ply.py
    - sdf.py
    - off.py
    - txt_array.py
    - npz.py
    - __init__.py
    - obj.py
    - tu.py
  - __init__.py
  - visualization
    - influence.py
    - __init__.py
  - debug.py
  - transforms
    - cartesian.py
    - grid_sampling.py
    - line_graph.py
    - one_hot_degree.py
    - remove_isolated_nodes.py
    - normalize_features.py
    - random_scale.py
    - random_translate.py
    - compose.py
    - two_hop.py
    - sign.py
    - point_pair_features.py
    - radius_graph.py
    - local_degree_profile.py
    - delaunay.py
    - add_self_loops.py
    - gdc.py
    - to_sparse_tensor.py
    - center.py
    - to_dense.py
    - local_cartesian.py
    - laplacian_lambda_max.py
    - linear_transformation.py
    - random_flip.py
    - face_to_edge.py
    - spherical.py
    - target_indegree.py
    - sample_points.py
    - normalize_rotation.py
    - __init__.py
    - constant.py
    - generate_mesh_normals.py
    - normalize_scale.py
    - knn_graph.py
    - random_shear.py
    - to_superpixels.py
    - fixed_points.py
    - distance.py
    - random_rotate.py
    - polar.py
  - typing.py
- setup.cfg
- .travis.yml
- README.md
- .gitignore
- docs
  - Makefile
  - source
    - notes
      - introduction.rst
      - create_gnn.rst
      - batching.rst
      - jit.rst
      - installation.rst
      - resources.rst
      - create_dataset.rst
    - index.rst
    - conf.py
    - _figures
      - graph.tex
      - graph.svg
      - .gitignore
      - build.sh
    - modules
      - nn.rst
      - datasets.rst
      - io.rst
      - data.rst
      - transforms.rst
      - root.rst
      - utils.rst
    - _static
      - img
        pyg_logo.svg
        pyg_logo_text.svg
      - css
        custom.css
  - .nojekyll
  - requirements.txt
  - index.html
- MANIFEST.in
- benchmark
  - kernel
    - statistics.py
    - sort_pool.py
    - top_k.py
    - edge_pool.py
    - global_attention.py
    - set2set.py
    - sag_pool.py
    - asap.py
    - gin.py
    - gcn.py
    - datasets.py
    - diff_pool.py
    - __init__.py
    - main.py
    - train_eval.py
    - README.md
    - graph_sage.py
    - graclus.py
  - points
    - statistics.py
    - point_cnn.py
    - edge_cnn.py
    - spline_cnn.py
    - mpnn.py
    - point_net.py
    - datasets.py
    - __init__.py
    - train_eval.py
    - README.md
  - runtime
    - gat.py
    - gcn.py
    - dgl
      - gat.py
      - gcn.py
      - train.py
      - hidden.py
      - main.py
      - rgcn.py
    - train.py
    - __init__.py
    - main.py
    - README.md
    - rgcn.py
  - citation
    - statistics.py
    - cheb.py
    - run.sh
    - appnp.py
    - gat.py
    - gcn.py
    - datasets.py
    - __init__.py
    - train_eval.py
    - README.md
    - arma.py
    - sgc.py
  - setup.py
  - README.md

import os
import os.path as osp

import torch
import numpy as np
import scipy.sparse as sp
from torch_sparse import coalesce
from torch_geometric.data import (InMemoryDataset, Data, download_url,
                                  extract_zip)


class Reddit(InMemoryDataset):
    r"""The Reddit dataset from the `"Inductive Representation Learning on
    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper, containing
    Reddit posts belonging to different communities.

    Args:
        root (string): Root directory where the dataset should be saved.
        transform (callable, optional): A function/transform that takes in an
            :obj:`torch_geometric.data.Data` object and returns a transformed
            version. The data object will be transformed before every access.
            (default: :obj:`None`)
        pre_transform (callable, optional): A function/transform that takes in
            an :obj:`torch_geometric.data.Data` object and returns a
            transformed version. The data object will be transformed before
            being saved to disk. (default: :obj:`None`)
    """

    url = 'https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/reddit.zip'

    def __init__(self, root, transform=None, pre_transform=None):
        super(Reddit, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return ['reddit_data.npz', 'reddit_graph.npz']

    @property
    def processed_file_names(self):
        return 'data.pt'

    def download(self):
        path = download_url(self.url, self.raw_dir)
        extract_zip(path, self.raw_dir)
        os.unlink(path)

    def process(self):
        data = np.load(osp.join(self.raw_dir, 'reddit_data.npz'))
        x = torch.from_numpy(data['feature']).to(torch.float)
        y = torch.from_numpy(data['label']).to(torch.long)
        split = torch.from_numpy(data['node_types'])

        adj = sp.load_npz(osp.join(self.raw_dir, 'reddit_graph.npz'))
        row = torch.from_numpy(adj.row).to(torch.long)
        col = torch.from_numpy(adj.col).to(torch.long)
        edge_index = torch.stack([row, col], dim=0)
        edge_index, _ = coalesce(edge_index, None, x.size(0), x.size(0))

        data = Data(x=x, edge_index=edge_index, y=y)
        data.train_mask = split == 1
        data.val_mask = split == 2
        data.test_mask = split == 3

        data = data if self.pre_transform is None else self.pre_transform(data)

        torch.save(self.collate([data]), self.processed_paths[0])

    def __repr__(self):
        return '{}()'.format(self.__class__.__name__)