Python torch_geometric.data.DataLoader() Examples

The following are 19 code examples of torch_geometric.data.DataLoader(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch_geometric.data , or try the search function .
Example #1
Source File: train_eval.py    From pytorch_geometric with MIT License 6 votes vote down vote up
def run(train_dataset, test_dataset, model, epochs, batch_size, lr,
        lr_decay_factor, lr_decay_step_size, weight_decay):

    model = model.to(device)
    optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size, shuffle=False)

    for epoch in range(1, epochs + 1):
        if torch.cuda.is_available():
            torch.cuda.synchronize()

        t_start = time.perf_counter()

        train(model, optimizer, train_loader, device)
        test_acc = test(model, test_loader, device)

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        t_end = time.perf_counter()

        print('Epoch: {:03d}, Test: {:.4f}, Duration: {:.2f}'.format(
            epoch, test_acc, t_end - t_start))

        if epoch % lr_decay_step_size == 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_decay_factor * param_group['lr'] 
Example #2
Source File: py2graph.py    From ogb with MIT License 6 votes vote down vote up
def test_transform(py2graph):
    code = '''
from ogb.graphproppred import PygGraphPropPredDataset
from torch_geometric.data import DataLoader

dataset = PygGraphPropPredDataset(name = "ogbg-molhiv")

split_idx = dataset.get_idx_split() 
train_loader = DataLoader(dataset[split_idx["train"]], batch_size=32, shuffle=True)
valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=32, shuffle=False)
test_loader = DataLoader(dataset[split_idx["test"]], batch_size=32, shuffle=False)
'''

    graph = py2graph(code)
    print(graph)

    invalid_code = '''
import antigravity
xkcd loves Python
'''
    
    try:
        graph = py2graph(invalid_code)
    except SyntaxError:
        print('Successfully caught syntax error') 
Example #3
Source File: test.py    From deep_gcns_torch with MIT License 6 votes vote down vote up
def main():
    opt = OptInit().get_args()

    logging.info('===> Creating dataloader...')
    test_dataset = GeoData.S3DIS(opt.data_dir, 5, train=False, pre_transform=T.NormalizeScale())
    test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=0)
    opt.n_classes = test_loader.dataset.num_classes
    if opt.no_clutter:
        opt.n_classes -= 1

    logging.info('===> Loading the network ...')
    model = SparseDeepGCN(opt).to(opt.device)
    model, opt.best_value, opt.epoch = load_pretrained_models(model, opt.pretrained_model, opt.phase)

    logging.info('===> Start Evaluation ...')
    test(model, test_loader, opt) 
Example #4
Source File: pascal.py    From deep-graph-matching-consensus with MIT License 6 votes vote down vote up
def test(dataset):
    model.eval()

    loader = DataLoader(dataset, args.batch_size, shuffle=False,
                        follow_batch=['x_s', 'x_t'])

    correct = num_examples = 0
    while (num_examples < args.test_samples):
        for data in loader:
            data = data.to(device)
            S_0, S_L = model(data.x_s, data.edge_index_s, data.edge_attr_s,
                             data.x_s_batch, data.x_t, data.edge_index_t,
                             data.edge_attr_t, data.x_t_batch)
            y = generate_y(data.y)
            correct += model.acc(S_L, y, reduction='sum')
            num_examples += y.size(1)

            if num_examples >= args.test_samples:
                return correct / num_examples 
Example #5
Source File: willow.py    From deep-graph-matching-consensus with MIT License 6 votes vote down vote up
def run(i, datasets):
    datasets = [dataset.shuffle() for dataset in datasets]
    train_datasets = [dataset[:20] for dataset in datasets]
    test_datasets = [dataset[20:] for dataset in datasets]
    train_datasets = [
        PairDataset(train_dataset, train_dataset, sample=False)
        for train_dataset in train_datasets
    ]
    train_dataset = torch.utils.data.ConcatDataset(train_datasets)
    train_loader = DataLoader(train_dataset, args.batch_size, shuffle=True,
                              follow_batch=['x_s', 'x_t'])

    model.load_state_dict(state_dict)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    for epoch in range(1, 1 + args.epochs):
        train(train_loader, optimizer)

    accs = [100 * test(test_dataset) for test_dataset in test_datasets]

    print(f'Run {i:02d}:')
    print(' '.join([category.ljust(13) for category in WILLOW.categories]))
    print(' '.join([f'{acc:.2f}'.ljust(13) for acc in accs]))

    return accs 
Example #6
Source File: willow.py    From deep-graph-matching-consensus with MIT License 6 votes vote down vote up
def test(test_dataset):
    model.eval()

    test_loader1 = DataLoader(test_dataset, args.batch_size, shuffle=True)
    test_loader2 = DataLoader(test_dataset, args.batch_size, shuffle=True)

    correct = num_examples = 0
    while (num_examples < args.test_samples):
        for data_s, data_t in zip(test_loader1, test_loader2):
            data_s, data_t = data_s.to(device), data_t.to(device)
            _, S_L = model(data_s.x, data_s.edge_index, data_s.edge_attr,
                           data_s.batch, data_t.x, data_t.edge_index,
                           data_t.edge_attr, data_t.batch)
            y = generate_y(num_nodes=10, batch_size=data_t.num_graphs)
            correct += model.acc(S_L, y, reduction='sum')
            num_examples += y.size(1)

            if num_examples >= args.test_samples:
                return correct / num_examples 
Example #7
Source File: test_re_net.py    From pytorch_geometric with MIT License 6 votes vote down vote up
def test_re_net():
    root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    dataset = MyTestEventDataset(root, seq_len=4)
    loader = DataLoader(dataset, 2, follow_batch=['h_sub', 'h_obj'])

    model = RENet(
        dataset.num_nodes, dataset.num_rels, hidden_channels=16, seq_len=4)

    logits = torch.randn(6, 6)
    y = torch.tensor([0, 1, 2, 3, 4, 5])

    mrr, hits1, hits3, hits10 = model.test(logits, y)
    assert 0.15 < mrr <= 1
    assert hits1 <= hits3 and hits3 <= hits10 and hits10 == 1

    for data in loader:
        log_prob_obj, log_prob_sub = model(data)
        model.test(log_prob_obj, data.obj)
        model.test(log_prob_sub, data.sub)

    shutil.rmtree(root) 
Example #8
Source File: run_imdb.py    From graph_star with MIT License 5 votes vote down vote up
def load_data(data_type):
    bs = 0
    shuffle = True
    if data_type == "split":
        trainData, testData = _load_split_data()
        bs = 5000
    elif data_type == "split_2k":
        trainData, testData = _load_split_2k_data()
        bs = 100
        valData = trainData[0:1]
        trainData = trainData[1: ]
        bs = 1
        shuffle = False
    elif data_type == "split_star":
        trainData, testData = _load_split_star_data()
        bs = 80
    elif data_type == "all":
        trainData, testData = _load_all_data()
        bs = 1
    else:
        trainData, testData = _load_all_data()
        bs = 1
    train_loader = DataLoader(trainData, batch_size=bs, shuffle=shuffle)
    val_loader = DataLoader(valData, batch_size=bs, shuffle=shuffle)
    test_loader = DataLoader(testData, batch_size=bs, shuffle=shuffle)

    return train_loader, val_loader, test_loader 
Example #9
Source File: run_gc.py    From graph_star with MIT License 5 votes vote down vote up
def load_data(dataset_name, val_idx):
    bs = 600
    path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', dataset_name)
    dataset = TUDataset(path, dataset_name, use_node_attr=True)
    dataset = dataset.shuffle()

    if dataset_name == "DD" or dataset_name == "MUTAG":
        # remove node label
        dataset.data.x = dataset.data.x[:, :-3]

    val_size = len(dataset) // 10

    if val_idx == 0:
        train_data = dataset[val_size:]
    elif 0 < val_idx < 9:
        train_data = dataset[:(val_idx * val_size)] + dataset[((val_idx + 1) * val_size):]
    elif val_idx == 9:
        train_data = dataset[:(val_idx * val_size)]
    else:
        raise AttributeError("val index must in [0,9]")

    train_loader = DataLoader(train_data, batch_size=bs)
    val_loader = DataLoader(dataset[(val_idx * val_size): ((val_idx + 1) * val_size)], batch_size=bs)
    test_loader = DataLoader(dataset[(val_idx * val_size): ((val_idx + 1) * val_size)], batch_size=bs)

    return dataset, train_loader, val_loader, test_loader 
Example #10
Source File: run_text_classification.py    From graph_star with MIT License 5 votes vote down vote up
def load_data():
    bs = 96
    path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', DATASET)
    dataset = TUDataset(path, DATASET, use_node_attr=True)
#     dataset = dataset.shuffle()
#     dataset.data.x = dataset.data.x[:, :-3]
    # dataset.data.node_label = dataset.data.x[:, -3:]
    
    if 'MR' in DATASET:
        real_train_size = 6398
        train_size = 7108
    elif 'R8' in DATASET:
        real_train_size = 4937
        train_size = 5485
    elif '20ng' in DATASET:
        real_train_size = 10183
        train_size = 11314
    elif 'R52' in DATASET:
        real_train_size = 5879
        train_size = 6532
    elif 'ohsumed' in DATASET:
        real_train_size = 3022
        train_size = 3357
    
    train_loader = DataLoader(dataset[:real_train_size], batch_size=bs)
    val_loader = DataLoader(dataset[real_train_size:train_size], batch_size=bs)
    test_loader = DataLoader(dataset[train_size:], batch_size=bs)

    print('batch size is : ' + str(bs))
    return dataset, dataset, dataset, train_loader, val_loader, test_loader 
Example #11
Source File: run_transductive_nc.py    From graph_star with MIT License 5 votes vote down vote up
def load_data(dataset_name):
    path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', dataset_name)

    dataset = Planetoid(path, dataset_name, T.TargetIndegree())
    train_loader = DataLoader(dataset, batch_size=1)
    return dataset, train_loader 
Example #12
Source File: run_ppi.py    From graph_star with MIT License 5 votes vote down vote up
def load_data():
    path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', 'PPI')

    train_dataset = PPI(path, split='train')
    val_dataset = PPI(path, split='test')
    test_dataset = PPI(path, split='test')
    train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)

    return train_dataset, val_dataset, test_dataset, train_loader, val_loader, test_loader 
Example #13
Source File: test_dataloader.py    From pytorch_geometric with MIT License 5 votes vote down vote up
def test_dataloader():
    x = torch.Tensor([[1], [1], [1]])
    edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]])
    face = torch.tensor([[0], [1], [2]])
    y = 2.
    z = torch.tensor(0.)
    name = 'data'

    data = Data(x=x, edge_index=edge_index, y=y, z=z, name=name)
    assert data.__repr__() == (
        'Data(edge_index=[2, 4], name="data", x=[3, 1], y=2.0, z=0.0)')
    data.face = face

    loader = DataLoader([data, data], batch_size=2, shuffle=False)

    for batch in loader:
        assert len(batch) == 7
        assert batch.batch.tolist() == [0, 0, 0, 1, 1, 1]
        assert batch.x.tolist() == [[1], [1], [1], [1], [1], [1]]
        assert batch.edge_index.tolist() == [[0, 1, 1, 2, 3, 4, 4, 5],
                                             [1, 0, 2, 1, 4, 3, 5, 4]]
        assert batch.y.tolist() == [2.0, 2.0]
        assert batch.z.tolist() == [0.0, 0.0]
        assert batch.name == ['data', 'data']
        assert batch.face.tolist() == [[0, 3], [1, 4], [2, 5]]

    loader = DataLoader([data, data], batch_size=2, shuffle=False,
                        follow_batch=['edge_index'])

    for batch in loader:
        assert len(batch) == 8
        assert batch.edge_index_batch.tolist() == [0, 0, 0, 0, 1, 1, 1, 1] 
Example #14
Source File: train.py    From deep_gcns_torch with MIT License 5 votes vote down vote up
def main():
    opt = OptInit().get_args()
    logging.info('===> Creating dataloader ...')
    train_dataset = GeoData.S3DIS(opt.data_dir, test_area=5, train=True, pre_transform=T.NormalizeScale())
    if opt.multi_gpus:
        train_loader = DataListLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=4)
    else:
        train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=4)
    opt.n_classes = train_loader.dataset.num_classes

    logging.info('===> Loading the network ...')
    model = SparseDeepGCN(opt).to(opt.device)
    if opt.multi_gpus:
        model = DataParallel(SparseDeepGCN(opt)).to(opt.device)
    logging.info('===> loading pre-trained ...')
    model, opt.best_value, opt.epoch = load_pretrained_models(model, opt.pretrained_model, opt.phase)
    logging.info(model)

    logging.info('===> Init the optimizer ...')
    criterion = torch.nn.CrossEntropyLoss().to(opt.device)
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, opt.lr_adjust_freq, opt.lr_decay_rate)
    optimizer, scheduler, opt.lr = load_pretrained_optimizer(opt.pretrained_model, optimizer, scheduler, opt.lr)

    logging.info('===> Init Metric ...')
    opt.losses = AverageMeter()
    # opt.test_metric = miou
    # opt.test_values = AverageMeter()
    opt.test_value = 0.

    logging.info('===> start training ...')
    for _ in range(opt.total_epochs):
        opt.epoch += 1
        train(model, train_loader, optimizer, scheduler, criterion, opt)
        # test_value = test(model, test_loader, test_metric, opt)
        scheduler.step()
    logging.info('Saving the final model.Finish!') 
Example #15
Source File: train_eval.py    From IGMC with MIT License 5 votes vote down vote up
def test_once(test_dataset,
              model,
              batch_size,
              logger=None, 
              ensemble=False, 
              checkpoints=None):

    test_loader = DataLoader(test_dataset, batch_size, shuffle=False)
    model.to(device)
    t_start = time.perf_counter()
    if ensemble and checkpoints:
        rmse = eval_rmse_ensemble(model, checkpoints, test_loader, device, show_progress=True)
    else:
        rmse = eval_rmse(model, test_loader, device, show_progress=True)
    t_end = time.perf_counter()
    duration = t_end - t_start
    print('Test Once RMSE: {:.6f}, Duration: {:.6f}'.format(rmse, duration))
    epoch_info = 'test_once' if not ensemble else 'ensemble'
    eval_info = {
        'epoch': epoch_info,
        'train_loss': 0,
        'test_rmse': rmse,
        }
    if logger is not None:
        logger(eval_info, None, None)
    return rmse 
Example #16
Source File: test_enzymes.py    From pytorch_geometric with MIT License 4 votes vote down vote up
def test_enzymes():
    root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    dataset = TUDataset(root, 'ENZYMES')

    assert len(dataset) == 600
    assert dataset.num_features == 3
    assert dataset.num_classes == 6
    assert dataset.__repr__() == 'ENZYMES(600)'

    assert len(dataset[0]) == 3
    assert len(dataset.shuffle()) == 600
    assert len(dataset.shuffle(return_perm=True)) == 2
    assert len(dataset[:100]) == 100
    assert len(dataset[torch.arange(100, dtype=torch.long)]) == 100
    mask = torch.zeros(600, dtype=torch.bool)
    mask[:100] = 1
    assert len(dataset[mask]) == 100

    loader = DataLoader(dataset, batch_size=len(dataset))
    for data in loader:
        assert data.num_graphs == 600

        avg_num_nodes = data.num_nodes / data.num_graphs
        assert pytest.approx(avg_num_nodes, abs=1e-2) == 32.63

        avg_num_edges = data.num_edges / (2 * data.num_graphs)
        assert pytest.approx(avg_num_edges, abs=1e-2) == 62.14

        assert len(data) == 4
        assert list(data.x.size()) == [data.num_nodes, 3]
        assert list(data.y.size()) == [data.num_graphs]
        assert data.y.max() + 1 == 6
        assert list(data.batch.size()) == [data.num_nodes]

        assert data.contains_isolated_nodes()
        assert not data.contains_self_loops()
        assert data.is_undirected()

    loader = DataListLoader(dataset, batch_size=len(dataset))
    for data_list in loader:
        assert len(data_list) == 600

    dataset.transform = ToDense(num_nodes=126)
    loader = DenseDataLoader(dataset, batch_size=len(dataset))
    for data in loader:
        assert len(data) == 4
        assert list(data.x.size()) == [600, 126, 3]
        assert list(data.adj.size()) == [600, 126, 126]
        assert list(data.mask.size()) == [600, 126]
        assert list(data.y.size()) == [600, 1]

    dataset = TUDataset(root, 'ENZYMES', use_node_attr=True)
    assert dataset.num_node_features == 21
    assert dataset.num_features == 21
    assert dataset.num_edge_features == 0

    shutil.rmtree(root) 
Example #17
Source File: test_planetoid.py    From pytorch_geometric with MIT License 4 votes vote down vote up
def test_citeseer():
    root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    dataset = Planetoid(root, 'Citeseer')
    loader = DataLoader(dataset, batch_size=len(dataset))

    assert len(dataset) == 1
    assert dataset.__repr__() == 'Citeseer()'

    for data in loader:
        assert data.num_graphs == 1
        assert data.num_nodes == 3327
        assert data.num_edges / 2 == 4552

        assert len(data) == 7
        assert list(data.x.size()) == [data.num_nodes, 3703]
        assert list(data.y.size()) == [data.num_nodes]
        assert data.y.max() + 1 == 6
        assert data.train_mask.sum() == 6 * 20
        assert data.val_mask.sum() == 500
        assert data.test_mask.sum() == 1000
        assert (data.train_mask & data.val_mask & data.test_mask).sum() == 0
        assert list(data.batch.size()) == [data.num_nodes]

        assert data.contains_isolated_nodes()
        assert not data.contains_self_loops()
        assert data.is_undirected()

    dataset = Planetoid(root, 'Citeseer', split='full')
    data = dataset[0]
    assert data.val_mask.sum() == 500
    assert data.test_mask.sum() == 1000
    assert data.train_mask.sum() == data.num_nodes - 1500
    assert (data.train_mask & data.val_mask & data.test_mask).sum() == 0

    dataset = Planetoid(root, 'Citeseer', split='random',
                        num_train_per_class=11, num_val=29, num_test=41)
    data = dataset[0]
    assert data.train_mask.sum() == dataset.num_classes * 11
    assert data.val_mask.sum() == 29
    assert data.test_mask.sum() == 41
    assert (data.train_mask & data.val_mask & data.test_mask).sum() == 0

    shutil.rmtree(root) 
Example #18
Source File: test.py    From deep_gcns_torch with MIT License 4 votes vote down vote up
def main():

    args = ArgsInit().args

    if args.use_gpu:
        device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
    else:
        device = torch.device('cpu')

    if args.not_extract_node_feature:
        dataset = PygGraphPropPredDataset(name=args.dataset,
                                          transform=add_zeros)
    else:
        extract_node_feature_func = partial(extract_node_feature, reduce=args.aggr)
        dataset = PygGraphPropPredDataset(name=args.dataset,
                                          transform=extract_node_feature_func)

    args.num_tasks = dataset.num_classes
    evaluator = Evaluator(args.dataset)

    split_idx = dataset.get_idx_split()

    train_loader = DataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=False,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=args.batch_size, shuffle=False,
                              num_workers=args.num_workers)
    test_loader = DataLoader(dataset[split_idx["test"]], batch_size=args.batch_size, shuffle=False,
                             num_workers=args.num_workers)

    print(args)

    model = DeeperGCN(args)
    model.load_state_dict(torch.load(args.model_load_path)['model_state_dict'])
    model.to(device)

    train_accuracy = eval(model, device, train_loader, evaluator)
    valid_accuracy = eval(model, device, valid_loader, evaluator)
    test_accuracy = eval(model, device, test_loader, evaluator)

    print({'Train': train_accuracy,
           'Validation': valid_accuracy,
           'Test': test_accuracy})
    model.print_params(final=True) 
Example #19
Source File: test.py    From deep_gcns_torch with MIT License 4 votes vote down vote up
def main():

    args = ArgsInit().args

    if args.use_gpu:
        device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
    else:
        device = torch.device('cpu')

    dataset = PygGraphPropPredDataset(name=args.dataset)
    args.num_tasks = dataset.num_tasks
    print(args)

    if args.feature == 'full':
        pass
    elif args.feature == 'simple':
        print('using simple feature')
        # only retain the top two node/edge features
        dataset.data.x = dataset.data.x[:, :2]
        dataset.data.edge_attr = dataset.data.edge_attr[:, :2]


    split_idx = dataset.get_idx_split()

    evaluator = Evaluator(args.dataset)

    train_loader = DataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=False,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=args.batch_size, shuffle=False,
                              num_workers=args.num_workers)
    test_loader = DataLoader(dataset[split_idx["test"]], batch_size=args.batch_size, shuffle=False,
                             num_workers=args.num_workers)

    model = DeeperGCN(args)

    model.load_state_dict(torch.load(args.model_load_path)['model_state_dict'])
    model.to(device)

    train_result = eval(model, device, train_loader, evaluator)[dataset.eval_metric]
    valid_result = eval(model, device, valid_loader, evaluator)[dataset.eval_metric]
    test_result = eval(model, device, test_loader, evaluator)[dataset.eval_metric]

    print({'Train': train_result,
           'Validation': valid_result,
           'Test': test_result})

    model.print_params(final=True)