Python torch.utils.data.TensorDataset() Examples

The following are 30 code examples of torch.utils.data.TensorDataset(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.utils.data , or try the search function .
Example #1
Source File: base_trainer.py    From BiaffineDependencyParsing with MIT License 8 votes vote down vote up
def _unpack_batch(self, batch: TensorDataset) -> Dict:
        """
        拆分batch,得到encoder的输入和word mask,sentence length,以及dep ids,以及其他输入信息
        eg:
            dataset = TensorDataset(all_input_ids, all_input_mask,
                        all_segment_ids, all_start_pos,
                        all_end_pos, all_dep_ids,
                        all_pos_ids)

        Args:
            batch: 输入的单个batch,类型为TensorDataset(或者torchtext.dataset),可用索引分别取值

        Returns:
            返回一个字典,[1]是inputs,类型为字典;[2]是word mask;[3]是sentence length,python 列表;[4]是dep ids,
            根据实际情况可能还包含其他输入信息
        """
        raise NotImplementedError('must implement in sub class') 
Example #2
Source File: bertology_loader.py    From BiaffineDependencyParsing with MIT License 7 votes vote down vote up
def feature_to_dataset(features):
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
    all_start_pos = torch.tensor([t.start_pos for t in features], dtype=torch.long)
    # print([t.end_pos for t in features])
    all_end_pos = torch.tensor([t.end_pos for t in features], dtype=torch.long)
    all_dep_ids = torch.tensor([t.dep_ids for t in features], dtype=torch.long)
    tensors = [all_input_ids, all_input_mask, all_segment_ids, all_start_pos, all_end_pos, all_dep_ids]
    if hasattr(features[0], 'pos_ids'):
        all_pos_ids = torch.tensor([t.pos_ids for t in features], dtype=torch.long)
        tensors.append(all_pos_ids)
    dataset = TensorDataset(*tensors)
    # Input Tensors:
    #   all_input_ids,
    #   all_input_mask,
    #   all_segment_ids,
    #   all_start_pos,
    #   all_end_pos,
    #   all_dep_ids,
    #   all_pos_ids, (如果有)
    return dataset 
Example #3
Source File: serverstate.py    From gandissect with MIT License 6 votes vote down vote up
def feature_maps(self, z_batch, intervention=None, layers=None,
            quantiles=True):
        feature_map = defaultdict(list)
        with torch.no_grad(), self.modellock:
            batch_size = 10
            self.apply_intervention(intervention)
            test_loader = DataLoader(
                TensorDataset(z_batch[:,:,None,None]),
                batch_size=batch_size,
                pin_memory=('cuda' == self.device.type
                    and z_batch.device.type == 'cpu'))
            processed = 0
            for batch_num, [batch_z] in enumerate(test_loader):
                batch_z = batch_z.to(self.device)
                # Run model but disregard output
                self.model(batch_z)
                processing = batch_z.shape[0]
                for layer, feature in self.model.retained_features().items():
                    for single_featuremap in feature:
                        if quantiles:
                            feature_map[layer].append(self.quantiles[layer]
                                    .normalize(single_featuremap))
                        else:
                            feature_map[layer].append(single_featuremap)
        return feature_map 
Example #4
Source File: serverstate.py    From gandissect with MIT License 6 votes vote down vote up
def generate_images(self, z_batch, intervention=None):
        '''
        Makes some images.
        '''
        with torch.no_grad(), self.modellock:
            batch_size = 10
            self.apply_intervention(intervention)
            test_loader = DataLoader(TensorDataset(z_batch[:,:,None,None]),
                batch_size=batch_size,
                pin_memory=('cuda' == self.device.type
                            and z_batch.device.type == 'cpu'))
            result_img = torch.zeros(
                    *((len(z_batch), 3) + self.model.output_shape[2:]),
                    dtype=torch.uint8, device=self.device)
            for batch_num, [batch_z,] in enumerate(test_loader):
                batch_z = batch_z.to(self.device)
                out = self.model(batch_z)
                result_img[batch_num*batch_size:
                        batch_num*batch_size+len(batch_z)] = (
                                (((out + 1) / 2) * 255).clamp(0, 255).byte())
            return result_img 
Example #5
Source File: msda_preprocessed_amazon_dataset.py    From man with MIT License 6 votes vote down vote up
def get_msda_amazon_datasets(data_file, domain, kfold, feature_num):
    print(f'Loading mSDA Preprocessed Multi-Domain Amazon data for {domain} Domain')
    dataset = pickle.load(open(data_file, 'rb'))[domain]

    lx, ly = dataset['labeled']
    if feature_num > 0:
        lx = lx[:, : feature_num]
    lx = torch.from_numpy(lx.toarray()).float().to(opt.device)
    ly = torch.from_numpy(ly).long().to(opt.device)
    print(f'{domain} Domain has {len(ly)} labeled instances.')
    # if opt.use_cuda:
    #     lx, ly = lx.cuda(), ly.cuda()
    labeled_set = FoldedDataset(TensorDataset, kfold, lx, ly)

    ux, uy = dataset['unlabeled']
    if feature_num > 0:
        ux = ux[:, : feature_num]
    ux = torch.from_numpy(ux.toarray()).float().to(opt.device)
    uy = torch.from_numpy(uy).long().to(opt.device)
    print(f'{domain} Domain has {len(uy)} unlabeled instances.')
    # if opt.use_cuda:
    #     ux, uy = ux.cuda(), uy.cuda()
    unlabeled_set = TensorDataset(ux, uy)

    return labeled_set, unlabeled_set 
Example #6
Source File: folded_dataset.py    From man with MIT License 6 votes vote down vote up
def get_folds(self, folds):
        indices = np.hstack([self.folds[f] for f in folds]).reshape(-1)
        if self.__class__.__bases__[0].__name__ == 'TensorDataset':
            indices = torch.from_numpy(indices).to(opt.device)
            # if opt.use_cuda:
            #     indices = indices.cuda()
            X = torch.index_select(self.tensors[0], 0, indices)
            Y = torch.index_select(self.tensors[1], 0, indices)
            return TensorDataset(X, Y)
        else:
            X = [self.X[i] for i in indices]
            indices = torch.from_numpy(indices).to(opt.device)
            # if opt.use_cuda:
            #     indices = indices.cuda()
            Y = torch.index_select(self.Y, 0, indices)
        return AmazonDataset(X, Y, self.max_seq_len) 
Example #7
Source File: rnn_utils.py    From guacamol_baselines with MIT License 6 votes vote down vote up
def get_tensor_dataset(numpy_array):
    """
    Gets a numpy array of indices, convert it into a Torch tensor,
    divided it into inputs and targets and wrap it
    into a TensorDataset

    Args:
        numpy_array: to be converted

    Returns: a TensorDataset
    """

    tensor = torch.from_numpy(numpy_array).long()

    inp = tensor[:, :-1]
    target = tensor[:, 1:]

    return TensorDataset(inp, target) 
Example #8
Source File: trial.py    From torchbearer with MIT License 6 votes vote down vote up
def with_test_data(self, x, batch_size=1, num_workers=1, steps=None):
        """Use this trial with the given test data. Returns self so that methods can be chained for convenience.

        Example: ::

            # Simple trial that runs for 10 test iterations on some random data
            >>> from torchbearer import Trial
            >>> data = torch.rand(10, 1)
            >>> trial = Trial(None).with_test_data(data).for_test_steps(10).run(1)

        Args:
            x (torch.Tensor): The test x data to use during calls to :meth:`.predict`
            batch_size (int): The size of each batch to sample from the data
            num_workers (int): Number of worker threads to use in the data loader
            steps (int): The number of steps per epoch to take when using this data

        Returns:
            Trial: self
        """
        dataset = TensorDataset(x)
        dataloader = DataLoader(dataset, batch_size, num_workers=num_workers)
        self.with_test_generator(dataloader, steps=steps)

        return self 
Example #9
Source File: logistic.py    From holoclean with Apache License 2.0 6 votes vote down vote up
def train(self, num_epochs=3, batch_size=32):
        """
        Trains the LR model.

        :param num_epochs: (int) number of epochs.
        """
        batch_losses = []
        # We train only on cells that do not have their initial value as NULL.
        X_train, Y_train = self._X.index_select(0, self._train_idx), self._Y.index_select(0, self._train_idx)
        torch_ds = TensorDataset(X_train, Y_train)

        # Main training loop.
        for epoch_idx in range(1, num_epochs+1):
            logging.debug("Logistic: epoch %d", epoch_idx)
            batch_cnt = 0
            for batch_X, batch_Y in tqdm(DataLoader(torch_ds, batch_size=batch_size)):
                batch_pred = self.forward(batch_X)
                batch_loss = self._loss(batch_pred, batch_Y.reshape(-1,1))
                batch_losses.append(float(batch_loss))
                self.zero_grad()
                batch_loss.backward()
                self._optimizer.step()
                batch_cnt += 1
            logging.debug('Logistic: average batch loss: %f', sum(batch_losses[-1 * batch_cnt:]) / batch_cnt)
        return batch_losses 
Example #10
Source File: test_end_to_end.py    From torchbearer with MIT License 6 votes vote down vote up
def test_callbacks(self):
        from torch.utils.data import TensorDataset
        traingen = TensorDataset(torch.rand(10, 1, 3), torch.rand(10, 1))
        valgen = TensorDataset(torch.rand(10, 1, 3), torch.rand(10, 1))
        testgen = TensorDataset(torch.rand(10, 1, 3), torch.rand(10, 1))

        model = torch.nn.Linear(3, 1)
        optim = torch.optim.SGD(model.parameters(), lr=0.01)
        cbs = []
        cbs.extend([c.EarlyStopping(), c.GradientClipping(10, model.parameters()), c.Best('test.pt'),
                    c.MostRecent('test.pt'), c.ReduceLROnPlateau(), c.CosineAnnealingLR(0.1, 0.01),
                    c.ExponentialLR(1), c.Interval('test.pt'), c.CSVLogger('test_csv.pt'),
                    c.L1WeightDecay(), c.L2WeightDecay(), c.TerminateOnNaN(monitor='fail_metric')])

        trial = torchbearer.Trial(model, optim, torch.nn.MSELoss(), metrics=['loss'], callbacks=cbs)
        trial = trial.with_generators(traingen, valgen, testgen)
        trial.run(2)
        trial.predict()
        trial.evaluate(data_key=torchbearer.TEST_DATA)
        trial.evaluate()

        import os
        os.remove('test.pt')
        os.remove('test_csv.pt') 
Example #11
Source File: run.py    From s2cnn with MIT License 5 votes vote down vote up
def load_data(path, batch_size):

    with gzip.open(path, 'rb') as f:
        dataset = pickle.load(f)

    train_data = torch.from_numpy(
        dataset["train"]["images"][:, None, :, :].astype(np.float32))
    train_labels = torch.from_numpy(
        dataset["train"]["labels"].astype(np.int64))

    # TODO normalize dataset
    # mean = train_data.mean()
    # stdv = train_data.std()

    train_dataset = data_utils.TensorDataset(train_data, train_labels)
    train_loader = data_utils.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    test_data = torch.from_numpy(
        dataset["test"]["images"][:, None, :, :].astype(np.float32))
    test_labels = torch.from_numpy(
        dataset["test"]["labels"].astype(np.int64))

    test_dataset = data_utils.TensorDataset(test_data, test_labels)
    test_loader = data_utils.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

    return train_loader, test_loader, train_dataset, test_dataset 
Example #12
Source File: aceoptimize.py    From gandissect with MIT License 5 votes vote down vote up
def compute_feature_quantiles(args, corpus, cache_filename, model, full_sample):
    # Phase 1.6.  Figure the 99% and 99.9%ile of every feature.
    if all(k in corpus for k in ['feature_99', 'feature_999']):
        return
    progress = default_progress()
    with torch.no_grad():
        rq = RunningQuantile(resolution=10000) # 10x what's needed.
        for [zbatch] in progress(
                torch.utils.data.DataLoader(TensorDataset(full_sample),
                batch_size=args.inference_batch_size, num_workers=10,
                pin_memory=True),
                desc="Calculating 0.999 quantile"):
            zbatch = zbatch.cuda()
            tensor_image = model(zbatch)
            feat = model.retained_layer(args.layer)
            rq.add(feat.permute(0, 2, 3, 1
                ).contiguous().view(-1, feat.shape[1]))
        result = rq.quantiles([0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999])
        corpus.feature_001 = result[:, 0].cpu()
        corpus.feature_01 = result[:, 1].cpu()
        corpus.feature_10 = result[:, 2].cpu()
        corpus.feature_50 = result[:, 3].cpu()
        corpus.feature_90 = result[:, 4].cpu()
        corpus.feature_99 = result[:, 5].cpu()
        corpus.feature_999 = result[:, 6].cpu()
    numpy.savez(cache_filename, **corpus) 
Example #13
Source File: data_utils_torch.py    From CaptchaRecognition with MIT License 5 votes vote down vote up
def load_dataset(batch_size,dir='data',n_workers=0,test_size=16384,total_size=None):
    print ("Loading data...")
    data = np.load(os.path.join(dir,'captcha.npz'))
    image = data['img'].astype(np.float32)/127.5-1
    text = data['text']
    print ("Loading dictionary...")
    vocab = pickle.load(open(os.path.join(dir,'captcha.vocab_dict'),'rb'),encoding='utf8')

    print ("Convert to tensor...")
    if total_size is None:
        image = torch.Tensor(image).permute(0,3,1,2)
        text = torch.LongTensor(text)
    else:
        image = torch.Tensor(image[:total_size]).permute(0,3,1,2)
        text = torch.LongTensor(text[:total_size])

    image_train = image[:-test_size]
    image_test = image[-test_size:]
    text_train = text[:-test_size]
    text_test = text[-test_size:]
    print ("Build dataset...")
    dataset_train = TensorDataset(image_train,text_train)
    dataset_test = TensorDataset(image_test,text_test)

    if torch.cuda.is_available():
        pm = True
    else:
        pm = False
    print ("Build dataloader...")
    dataloader_train = DataLoader(dataset_train,batch_size,True,num_workers=n_workers)
    dataloader_test = DataLoader(dataset_test,batch_size,shuffle=False)
    print ("data ready!")
    return dataloader_train,dataloader_test,vocab 
Example #14
Source File: load_data.py    From cnn-surrogate with MIT License 5 votes vote down vote up
def load_data(data_dir, batch_size):
    """Return data loader

    Args:
        data_dir: directory to hdf5 file, e.g. `dir/to/kle4225_lhs256.hdf5`
        batch_size (int): mini-batch size for loading data

    Returns:
        (data_loader (torch.utils.data.DataLoader), stats)
    """

    with h5py.File(data_dir, 'r') as f:
        x_data = f['input'][()]
        y_data = f['output'][()]

    print("input data shape: {}".format(x_data.shape))
    print("output data shape: {}".format(y_data.shape))

    kwargs = {'num_workers': 4,
              'pin_memory': True} if torch.cuda.is_available() else {}

    dataset = TensorDataset(torch.tensor(x_data), torch.tensor(y_data))
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, **kwargs)

    # simple statistics of output data
    y_data_mean = np.mean(y_data, 0)
    y_data_var = np.sum((y_data - y_data_mean) ** 2)
    stats = {}
    stats['y_mean'] = y_data_mean
    stats['y_var'] = y_data_var

    return data_loader, stats 
Example #15
Source File: layers_classify_pytorch.py    From mead-baseline with Apache License 2.0 5 votes vote down vote up
def __init__(self, x, x_lengths, y):
        self.tensor_dataset = TensorDataset(x, x_lengths, y) 
Example #16
Source File: eager_lm_pytorch.py    From mead-baseline with Apache License 2.0 5 votes vote down vote up
def __init__(self, x, y):
        self.tensor_dataset = TensorDataset(x, y) 
Example #17
Source File: runners.py    From bert_on_stilts with Apache License 2.0 5 votes vote down vote up
def convert_to_dataset(features):
    full_batch = features_to_data(features)
    dataset_ls = [full_batch.input_ids, full_batch.input_mask,
                  full_batch.segment_ids, full_batch.lm_label_ids]
    if full_batch.is_next is not None:
        dataset_ls.append(full_batch.is_next)

    dataset = TensorDataset(*dataset_ls)
    return dataset, full_batch.tokens 
Example #18
Source File: burgerLoader.py    From ar-pde-cnn with MIT License 5 votes vote down vote up
def createTestingLoader(self, data_dir, cases, t_start=0, dt=0.001, batch_size=1):
        '''
        Loads in training data from Fenics simulator
        Args:
            data_dir (string): directory of data
            cases (np.array): array of training cases, must be integers
            n_init (int): number of intial conditions to use from each case
            batch_size (int): mini-batch size
        '''
        testing_data = []
        target_data = []

        # Indexes of data time-steps to use as intial conditions
        nidx = int(t_start/dt)

        for i, val in enumerate(cases):
            file_name = data_dir+"/u{:d}.npy".format(val)
            print("Reading file: {}".format(file_name))
            u = np.load(file_name)

            # Convert to tensor and unsqueeze channel dim
            uTensor = torch.Tensor(u[nidx, :-1]).unsqueeze(0).unsqueeze(1)
            testing_data.append(uTensor.repeat(1,20,1))
            # Remove last element due to periodic conditions between [0,1]
            target_data.append(torch.Tensor(u[::int(self.dt/dt),:-1]))

        data_tuple = (torch.cat(testing_data, dim=0), torch.stack(target_data, dim=0))
        testing_loader = DataLoader(TensorDataset(*data_tuple),
            batch_size=batch_size, shuffle=False, drop_last=False)

        return testing_loader 
Example #19
Source File: aceoptimize.py    From gandissect with MIT License 5 votes vote down vote up
def compute_mean_present_features(args, corpus, cache_filename, model):
    # Phase 1.5.  Figure mean activations for every channel where there
    # is a doorway.
    if all(k in corpus for k in ['mean_present_feature']):
        return
    progress = default_progress()
    with torch.no_grad():
        total_present_feature = 0
        for [zbatch, featloc] in progress(
                torch.utils.data.DataLoader(TensorDataset(
                    corpus.object_present_sample,
                    corpus.object_present_location),
                batch_size=args.inference_batch_size, num_workers=10,
                pin_memory=True),
                desc="Mean activations"):
            zbatch = zbatch.cuda()
            featloc = featloc.cuda()
            tensor_image = model(zbatch)
            feat = model.retained_layer(args.layer)
            flatfeat = feat.view(feat.shape[0], feat.shape[1], -1)
            sum_feature_at_obj = flatfeat[
                    torch.arange(feat.shape[0]).to(feat.device), :, featloc
                    ].sum(0)
            total_present_feature = total_present_feature + sum_feature_at_obj
        corpus.mean_present_feature = (total_present_feature / len(
                corpus.object_present_sample)).cpu()
    if cache_filename:
        numpy.savez(cache_filename, **corpus) 
Example #20
Source File: zdataset.py    From gandissect with MIT License 5 votes vote down vote up
def z_dataset_for_model(model, size=100, seed=1):
    return TensorDataset(z_sample_for_model(model, size, seed)) 
Example #21
Source File: run_classic.py    From s2cnn with MIT License 5 votes vote down vote up
def load_data(path, batch_size):

    with gzip.open(path, 'rb') as f:
        dataset = pickle.load(f)

    train_data = torch.from_numpy(
        dataset["train"]["images"][:, None, :, :].astype(np.float32))
    train_labels = torch.from_numpy(
        dataset["train"]["labels"].astype(np.int64))

    # TODO normalize dataset
    # mean = train_data.mean()
    # stdv = train_data.std()

    train_dataset = data_utils.TensorDataset(train_data, train_labels)
    train_loader = data_utils.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    test_data = torch.from_numpy(
        dataset["test"]["images"][:, None, :, :].astype(np.float32))
    test_labels = torch.from_numpy(
        dataset["test"]["labels"].astype(np.int64))

    test_dataset = data_utils.TensorDataset(test_data, test_labels)
    test_loader = data_utils.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

    return train_loader, test_loader, train_dataset, test_dataset 
Example #22
Source File: runners.py    From bert_on_stilts with Apache License 2.0 5 votes vote down vote up
def convert_to_dataset(features, label_mode):
    full_batch = features_to_data(features, label_mode=label_mode)
    if full_batch.label_ids is None:
        dataset = TensorDataset(full_batch.input_ids, full_batch.input_mask,
                                full_batch.segment_ids)
    else:
        dataset = TensorDataset(full_batch.input_ids, full_batch.input_mask,
                                full_batch.segment_ids, full_batch.label_ids)
    return dataset, full_batch.tokens 
Example #23
Source File: main_dense.py    From BLINK with MIT License 5 votes vote down vote up
def _process_crossencoder_dataloader(context_input, label_input, crossencoder_params):
    tensor_data = TensorDataset(context_input, label_input)
    sampler = SequentialSampler(tensor_data)
    dataloader = DataLoader(
        tensor_data, sampler=sampler, batch_size=crossencoder_params["eval_batch_size"]
    )
    return dataloader 
Example #24
Source File: absa_data_util.py    From BERT-for-RRC-ABSA with Apache License 2.0 5 votes vote down vote up
def build_dataset(features):
    input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
    token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
    label = torch.tensor([f.label for f in features], dtype=torch.long)
    return TensorDataset(input_ids, attention_mask, token_type_ids, label) 
Example #25
Source File: run_asc.py    From BERT-for-RRC-ABSA with Apache License 2.0 5 votes vote down vote up
def test(args):  # Load a trained model that you have fine-tuned (we assume evaluate on cpu)    
    processor = data_utils.AscProcessor()
    label_list = processor.get_labels()
    tokenizer = BertTokenizer.from_pretrained(modelconfig.MODEL_ARCHIVE_MAP[args.bert_model])
    eval_examples = processor.get_test_examples(args.data_dir)
    eval_features = data_utils.convert_examples_to_features(eval_examples, label_list, args.max_seq_length, tokenizer, "asc")

    logger.info("***** Running evaluation *****")
    logger.info("  Num examples = %d", len(eval_examples))
    logger.info("  Batch size = %d", args.eval_batch_size)
    all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
    all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
    eval_data = TensorDataset(all_input_ids, all_segment_ids, all_input_mask, all_label_ids)
    # Run prediction for full data
    eval_sampler = SequentialSampler(eval_data)
    eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size)

    model = torch.load(os.path.join(args.output_dir, "model.pt") )
    model.cuda()
    model.eval()
    
    full_logits=[]
    full_label_ids=[]
    for step, batch in enumerate(eval_dataloader):
        batch = tuple(t.cuda() for t in batch)
        input_ids, segment_ids, input_mask, label_ids = batch
        
        with torch.no_grad():
            logits = model(input_ids, segment_ids, input_mask)

        logits = logits.detach().cpu().numpy()
        label_ids = label_ids.cpu().numpy()

        full_logits.extend(logits.tolist() )
        full_label_ids.extend(label_ids.tolist() )

    output_eval_json = os.path.join(args.output_dir, "predictions.json") 
    with open(output_eval_json, "w") as fw:
        json.dump({"logits": full_logits, "label_ids": full_label_ids}, fw) 
Example #26
Source File: rnn_utils.py    From guacamol_baselines with MIT License 5 votes vote down vote up
def get_tensor_dataset_on_device(numpy_array, device):
    """
    Get tensor dataset and send it to a device
    Args:
        numpy_array: to be converted
        device: cuda | cpu

    Returns:
        a TensorDataset on the required device
    """

    dataset = get_tensor_dataset(numpy_array)
    dataset.tensors = tuple(t.to(device) for t in dataset.tensors)
    return dataset 
Example #27
Source File: burgerLoader2D.py    From ar-pde-cnn with MIT License 5 votes vote down vote up
def createTestingLoader(self, data_dir, cases, tMax=1.0, simdt=0.001, save_every=2, batch_size=1):
        '''
        Loads in training data from Fenics simulator, assumes simulator has saved
        each time-step at specified delta t
        Args:
            data_dir (string): directory of data
            cases (np.array): array of training cases, must be integers
            tMax (float): maximum time value to load simulator data up to
            simdt (float): time-step size used in the simulation
            save_every (int): Interval to load the training data at (default is 2 to match FEM simulator)
            batch_size (int): mini-batch size
        Returns:
            test_loader (Pytorch DataLoader): Returns testing loader
        '''
        testing_data = []
        target_data = []

        # Loop through test cases
        for i, val in enumerate(cases):
            case_dir = os.path.join(data_dir, "run{:d}".format(val))
            print("Reading test case: {}".format(case_dir))
            seq = []
            for j in range(0, int(tMax/simdt)+1, save_every):
                file_dir = os.path.join(case_dir, "u{:d}.npy".format(j))
                u0 = np.load(file_dir)
                # Remove the periodic nodes
                seq.append(u0[:,:,:])

            file_dir = os.path.join(case_dir, "u0.npy")
            uInit = np.load(file_dir)
            uTarget = np.stack(seq, axis=0)

            # Remove the periodic nodes and unsqueeze first dim
            testing_data.append(torch.Tensor(uInit[:,:,:]).unsqueeze(0))
            target_data.append(torch.Tensor(uTarget))
        # Create data loader
        data_tuple = (torch.cat(testing_data, dim=0), torch.stack(target_data, dim=0))
        testing_loader = DataLoader(TensorDataset(*data_tuple),
            batch_size=batch_size, shuffle=False, drop_last=False)

        return testing_loader 
Example #28
Source File: 53_machine_translation.py    From deep-learning-note with MIT License 5 votes vote down vote up
def read_data(max_seq_len):
    # in和out分别是input和output的缩写
    in_tokens, out_tokens, in_seqs, out_seqs = [], [], [], []
    with io.open('./data/translation/fr-en-small.txt') as f:
        lines = f.readlines()
    for line in lines:
        in_seq, out_seq = line.rstrip().split('\t')
        in_seq_tokens, out_seq_tokens = in_seq.split(' '), out_seq.split(' ')
        if max(len(in_seq_tokens), len(out_seq_tokens)) > max_seq_len - 1:
            continue  # 如果加上EOS后长于max_seq_len,则忽略掉此样本
        process_one_seq(in_seq_tokens, in_tokens, in_seqs, max_seq_len)
        process_one_seq(out_seq_tokens, out_tokens, out_seqs, max_seq_len)
    in_vocab, in_data = build_data(in_tokens, in_seqs)
    out_vocab, out_data = build_data(out_tokens, out_seqs)
    return in_vocab, out_vocab, Data.TensorDataset(in_data, out_data) 
Example #29
Source File: ksLoader.py    From ar-pde-cnn with MIT License 5 votes vote down vote up
def createTestingLoader(self, data_dir, cases, dt = 0.1, tmax=1000, batch_size=64):
        '''
        Loads in testing data from matlab simulator; includes target values in dataloader
        Args:
            data_dir (string): directory of data
            cases (np.array): array of training cases, must be integers
            n_init (int): number of intial conditions to use from each case
            batch_size (int): mini-batch size
        '''
        testing_data = []
        target_data = []
        for i, val in enumerate(cases):
            file_name = data_dir+"/ks_data_{:d}.dat".format(val)
            print("Reading file: {}".format(file_name))
            u = np.loadtxt(file_name, delimiter=',')
            u = u[:,:-1]
            # Initial state
            uTensor = torch.Tensor(u[int(100/dt), :]).unsqueeze(0).unsqueeze(0)
            testing_data.append(uTensor.repeat(1,5,1))
            # Full target field
            target_data.append(torch.Tensor(u[int(100/dt):int(100/dt)+tmax+1, :]))

        data_tuple = (torch.cat(testing_data, dim=0), torch.stack(target_data, dim=0))

        testing_data = DataLoader(TensorDataset(*data_tuple),
            batch_size=batch_size, shuffle=False)

        return testing_data 
Example #30
Source File: train.py    From squash-generation with MIT License 5 votes vote down vote up
def get_data_loaders(args, tokenizer):
    """ Prepare the dataset for training and evaluation """
    datasets_raw = {}
    logger.info("Loading training data")
    datasets_raw['train'] = get_dataset(tokenizer, args.dataset_cache, args.dataset_path, 'train')
    logger.info("Loading validation data")
    datasets_raw['valid'] = get_dataset(tokenizer, args.dataset_cache, args.dataset_path, 'dev')

    logger.info("Build inputs and labels")
    datasets = {
        "train": defaultdict(list),
        "valid": defaultdict(list)
    }

    for dataset_name, dataset in datasets_raw.items():
        for data_point in dataset:
            instance, _ = build_input_from_segments(data_point, tokenizer)
            for input_name, input_array in instance.items():
                datasets[dataset_name][input_name].append(input_array)

    logger.info("Pad inputs and convert to Tensor")
    tensor_datasets = {"train": [], "valid": []}
    for dataset_name, dataset in datasets.items():
        dataset = pad_dataset(dataset, padding=tokenizer.convert_tokens_to_ids(SPECIAL_TOKENS[-1]))
        for input_name in MODEL_INPUTS:
            tensor = torch.tensor(dataset[input_name])
            tensor_datasets[dataset_name].append(tensor)

    logger.info("Build train and validation dataloaders")
    train_dataset, valid_dataset = TensorDataset(*tensor_datasets["train"]), TensorDataset(*tensor_datasets["valid"])
    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) if args.distributed else None
    valid_sampler = torch.utils.data.distributed.DistributedSampler(valid_dataset) if args.distributed else None
    train_loader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size, shuffle=(not args.distributed))
    valid_loader = DataLoader(valid_dataset, sampler=valid_sampler, batch_size=args.valid_batch_size, shuffle=False)

    logger.info("Train dataset (Batch, Candidates, Seq length): {}".format(train_dataset.tensors[0].shape))
    logger.info("Valid dataset (Batch, Candidates, Seq length): {}".format(valid_dataset.tensors[0].shape))
    return train_loader, valid_loader, train_sampler, valid_sampler