Python torch.utils.data.ConcatDataset() Examples

The following are 30 code examples of torch.utils.data.ConcatDataset(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.utils.data , or try the search function .
Example #1
Source File: run_hnn.py    From mt-dnn with MIT License 6 votes vote down vote up
def build_training_data(args, tokenizer, tasks):
  dprd_task = DPRDTask(tokenizer)
  if args.wiki_data:
    wiki_task = WikiWSCRTask(tokenizer)
    train_data = wiki_task.get_train_dataset(args.wiki_data, args.max_seq_length, input_type=tasks)
  else:
    train_data = dprd_task.get_train_dataset(args.data_dir, args.max_seq_length, input_type=tasks)
    if args.dev_train:
      _data = dprd_task.get_dev_dataset(args.data_dir, args.max_seq_length, input_type=tasks)
      _data = [e.data for e in _data if e.name=='DPRD-test'][0]
      train_data = ConcatDataset([train_data, _data])
    if args.gap_data:
      gap_data = gap_task.get_train_dataset(args.gap_data, args.max_seq_length, input_type=tasks)
      train_data = ConcatDataset([train_data, gap_data])
      if args.dev_train:
        gap_data = [e.data for e in gap_task.get_dev_dataset(args.gap_data, args.max_seq_length, input_type=tasks)]
        train_data = ConcatDataset(gap_data + [train_data])
  return train_data 
Example #2
Source File: data.py    From torch-kalman with MIT License 6 votes vote down vote up
def from_dataframe(cls,
                       dataframe: 'DataFrame',
                       group_colname: str,
                       time_colname: str,
                       dt_unit: Optional[str],
                       measure_colnames: Optional[Sequence[str]] = None,
                       X_colnames: Optional[Sequence[str]] = None,
                       y_colnames: Optional[Sequence[str]] = None,
                       **kwargs) -> 'TimeSeriesDataLoader':
        dataset = ConcatDataset(
            datasets=[
                TimeSeriesDataset.from_dataframe(
                    dataframe=df,
                    group_colname=group_colname,
                    time_colname=time_colname,
                    measure_colnames=measure_colnames,
                    X_colnames=X_colnames,
                    y_colnames=y_colnames,
                    dt_unit=dt_unit
                )
                for g, df in dataframe.groupby(group_colname)
            ]
        )
        return cls(dataset=dataset, **kwargs) 
Example #3
Source File: data.py    From IIC with MIT License 6 votes vote down vote up
def _create_mapping_loader(config, dataset_class, partitions):
  imgs_list = []
  for partition in partitions:
    imgs_curr = dataset_class(
      **{"config": config,
         "split": partition,
         "purpose": "test"}  # return testing tuples, image and label
    )
    if config.use_doersch_datasets:
      imgs_curr = DoerschDataset(config, imgs_curr)
    imgs_list.append(imgs_curr)

  imgs = ConcatDataset(imgs_list)
  dataloader = torch.utils.data.DataLoader(imgs,
                                           batch_size=config.batch_sz,
                                           # full batch
                                           shuffle=False,
                                           # no point since not trained on
                                           num_workers=0,
                                           drop_last=False)
  return dataloader 
Example #4
Source File: test_its_journal_2019.py    From ehpi_action_recognition with MIT License 6 votes vote down vote up
def get_test_set_lab(dataset_path: str, image_size: ImageSize):
    num_joints = 15
    datasets = [
    EhpiLSTMDataset(os.path.join(dataset_path, "JOURNAL_2019_03_TEST_VUE01_30FPS"),
                             transform=transforms.Compose([
                                 RemoveJointsOutsideImgEhpi(image_size),
                                 NormalizeEhpi(image_size)
                             ]), num_joints=num_joints, dataset_part=DatasetPart.TEST),
    EhpiLSTMDataset(os.path.join(dataset_path, "JOURNAL_2019_03_TEST_VUE02_30FPS"),
                             transform=transforms.Compose([
                                 RemoveJointsOutsideImgEhpi(image_size),
                                 NormalizeEhpi(image_size)
                             ]), num_joints=num_joints, dataset_part=DatasetPart.TEST),
    ]
    for dataset in datasets:
        dataset.print_label_statistics()
    return ConcatDataset(datasets) 
Example #5
Source File: full_omniglot.py    From learn2learn with MIT License 6 votes vote down vote up
def __init__(self, root, transform=None, target_transform=None, download=False):
        self.root = os.path.expanduser(root)
        self.transform = transform
        self.target_transform = target_transform

        # Set up both the background and eval dataset
        omni_background = Omniglot(self.root, background=True, download=download)
        # Eval labels also start from 0.
        # It's important to add 964 to label values in eval so they don't overwrite background dataset.
        omni_evaluation = Omniglot(self.root,
                                   background=False,
                                   download=download,
                                   target_transform=lambda x: x + len(omni_background._characters))

        self.dataset = ConcatDataset((omni_background, omni_evaluation))
        self._bookkeeping_path = os.path.join(self.root, 'omniglot-bookkeeping.pkl') 
Example #6
Source File: __init__.py    From SSD with MIT License 6 votes vote down vote up
def build_dataset(dataset_list, transform=None, target_transform=None, is_train=True):
    assert len(dataset_list) > 0
    datasets = []
    for dataset_name in dataset_list:
        data = DatasetCatalog.get(dataset_name)
        args = data['args']
        factory = _DATASETS[data['factory']]
        args['transform'] = transform
        args['target_transform'] = target_transform
        if factory == VOCDataset:
            args['keep_difficult'] = not is_train
        elif factory == COCODataset:
            args['remove_empty'] = is_train
        dataset = factory(**args)
        datasets.append(dataset)
    # for testing, return a list of datasets
    if not is_train:
        return datasets
    dataset = datasets[0]
    if len(datasets) > 1:
        dataset = ConcatDataset(datasets)

    return [dataset] 
Example #7
Source File: dataset_enum.py    From BatchBALD with GNU General Public License v3.0 6 votes vote down vote up
def get_targets(dataset):
    """Get the targets of a dataset without any target target transforms(!)."""
    if isinstance(dataset, TransformedDataset):
        return get_targets(dataset.dataset)
    if isinstance(dataset, data.Subset):
        targets = get_targets(dataset.dataset)
        return torch.as_tensor(targets)[dataset.indices]
    if isinstance(dataset, data.ConcatDataset):
        return torch.cat([get_targets(sub_dataset) for sub_dataset in dataset.datasets])

    if isinstance(
            dataset, (datasets.MNIST, datasets.ImageFolder,)
    ):
        return torch.as_tensor(dataset.targets)
    if isinstance(dataset, datasets.SVHN):
        return dataset.labels

    raise NotImplementedError(f"Unknown dataset {dataset}!") 
Example #8
Source File: dataset_enum.py    From BatchBALD with GNU General Public License v3.0 6 votes vote down vote up
def get_CINIC10(root="./"):
    cinic_directory = root + "data/CINIC-10"
    cinic_mean = [0.47889522, 0.47227842, 0.43047404]
    cinic_std = [0.24205776, 0.23828046, 0.25874835]

    train_transform = transforms.Compose([transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip()])
    shared_transform = transforms.Compose([transforms.ToTensor(),
                                           transforms.Normalize(mean=cinic_mean,
                                                                std=cinic_std)])

    train_dataset = datasets.ImageFolder(cinic_directory + '/train')
    validation_dataset = datasets.ImageFolder(cinic_directory + '/valid')

    # Concatenate train and validation set to have more samples.
    merged_train_dataset = torch.utils.data.ConcatDataset([train_dataset, validation_dataset])

    test_dataset = datasets.ImageFolder(cinic_directory + '/test')

    return DataSource(
        train_dataset=merged_train_dataset,
        test_dataset=test_dataset,
        shared_transform=shared_transform,
        train_transform=train_transform,
    ) 
Example #9
Source File: train_its_journal_2019.py    From ehpi_action_recognition with MIT License 6 votes vote down vote up
def get_training_set_gt(dataset_path: str, image_size: ImageSize):
    num_joints = 15
    left_indexes: List[int] = [3, 4, 5, 9, 10, 11]
    right_indexes: List[int] = [6, 7, 8, 12, 13, 14]

    datasets: List[EhpiLSTMDataset] = [
        EhpiLSTMDataset(os.path.join(dataset_path, "JOURNAL_2019_03_GT_30fps"),
                        transform=transforms.Compose([
                            RemoveJointsOutsideImgEhpi(image_size),
                            ScaleEhpi(image_size),
                            TranslateEhpi(image_size),
                            FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                            NormalizeEhpi(image_size)
                        ]), num_joints=num_joints),
    ]
    for dataset in datasets:
        dataset.print_label_statistics()

    return ConcatDataset(datasets) 
Example #10
Source File: marcuhmot.py    From tracking_wo_bnw with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, split, dataloader):
		print("[*] Loading Market1501")
		market = Market1501('gt_bbox', **dataloader)
		print("[*] Loading CUHK03")
		cuhk = CUHK03('labeled', **dataloader)
		print("[*] Loading MOT")
		mot = MOTreIDWrapper(split, dataloader)

		self.dataset = ConcatDataset([market, cuhk, mot]) 
Example #11
Source File: data_silo.py    From FARM with Apache License 2.0 5 votes vote down vote up
def random_split_ConcatDataset(self, ds, lengths):
        """
        Roughly split a Concatdataset into non-overlapping new datasets of given lengths.
        Samples inside Concatdataset should already be shuffled

        :param ds: Dataset to be split
        :type ds: Dataset
        :param lengths: lengths of splits to be produced
        :type lengths: list
        """
        if sum(lengths) != len(ds):
            raise ValueError("Sum of input lengths does not equal the length of the input dataset!")

        try:
            idx_dataset = np.where(np.array(ds.cumulative_sizes) > lengths[0])[0][0]
        except IndexError:
            raise Exception("All dataset chunks are being assigned to train set leaving no samples for dev set. "
                            "Either consider increasing dev_split or setting it to 0.0\n"
                            f"Cumulative chunk sizes: {ds.cumulative_sizes}\n"
                            f"train/dev split: {lengths}")

        assert idx_dataset >= 1, "Dev_split ratio is too large, there is no data in train set. " \
                             f"Please lower dev_split = {self.processor.dev_split}"

        train = ConcatDataset(ds.datasets[:idx_dataset])
        test = ConcatDataset(ds.datasets[idx_dataset:])
        return train, test 
Example #12
Source File: base.py    From DSD-SATN with Apache License 2.0 5 votes vote down vote up
def _create_data_loader(self,train_flag=True,hard_minging=False):
        print('gathering datasets')
        if self.internet:
            datasets = Internet(train_flag = train_flag,high_resolution = self.high_resolution, spawn = self.receptive_field,video=self.video)
        elif self.test_single:
            datasets = Demo_Loader(train_flag = train_flag,high_resolution = self.high_resolution)
        elif self.eval_pw3d:
            datasets = PW3D(train_flag = train_flag,high_resolution = self.high_resolution, spawn = self.receptive_field,video=self.video,kps_alpha_format=self.alpha_format)
        else:
            datasets_list = []
            if self.with_h36m:
                h36m = hum36m_dataloader(scale_range = [1.0, 1.6],train_flag=train_flag)#[1.4, 1.6],
                datasets_list = [h36m]
            if self.with_up:
                updataset = UP(train_flag=train_flag,high_resolution=self.high_resolution)
                datasets_list.append(updataset)
            if self.with_mpii:
                mpii = MPIIDataset(train_flag=train_flag,high_resolution=self.high_resolution,)
                datasets_list.append(mpii)
            if self.with_aich:
                aich = AICH(train_flag=train_flag,high_resolution=self.high_resolution,)
                datasets_list.append(aich)
            if self.with_pa:
                pa = Penn_Action(train_flag = train_flag,high_resolution = self.high_resolution,kps_alpha_format=self.alpha_format,spawn = self.receptive_field,video=self.video,receptive_field = self.receptive_field,)
                datasets_list.append(pa)

            datasets = torch.utils.data.ConcatDataset(list(datasets_list))
        print('gathered datasets')

        return DataLoader(dataset = datasets, batch_size = self.batch_size if train_flag else self.val_batch_size,\
            shuffle = True,drop_last = False, pin_memory = True,num_workers = self.nw) 
Example #13
Source File: base.py    From DSD-SATN with Apache License 2.0 5 votes vote down vote up
def _create_adv_data_loader(self, data_adv_set):
        data_set = []
        for data_set_name in data_adv_set:
            data_set_path = config.data_set_path[data_set_name]
            if data_set_name == 'mosh':
                mosh = Mosh(data_set_path = data_set_path,)
                data_set.append(mosh)
            else:
                msg = 'invalid adv dataset'
                sys.exit(msg)

        con_adv_dataset = ConcatDataset(data_set)
        return DataLoader(dataset = con_adv_dataset,batch_size = self.batch_size, shuffle = True,drop_last = True,pin_memory = True) 
Example #14
Source File: dataset_enum.py    From BatchBALD with GNU General Public License v3.0 5 votes vote down vote up
def get_RepeatedMNIST():
    # num_classes = 10, input_size = 28
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
    org_train_dataset = datasets.MNIST("data", train=True, download=True, transform=transform)
    train_dataset = data.ConcatDataset([org_train_dataset] * 3)

    test_dataset = datasets.MNIST("data", train=False, transform=transform)
    return DataSource(train_dataset=train_dataset, test_dataset=test_dataset) 
Example #15
Source File: data.py    From IIC with MIT License 5 votes vote down vote up
def _create_dataloaders(config, dataset_class):
  # unlike in clustering, each dataloader here returns pairs of images - we
  # need the matrix relation between them
  dataloaders = []
  do_shuffle = (config.num_dataloaders == 1)
  for d_i in xrange(config.num_dataloaders):
    print("Creating paired dataloader %d out of %d time %s" %
          (d_i, config.num_dataloaders, datetime.now()))
    sys.stdout.flush()

    train_imgs_list = []
    for train_partition in config.train_partitions:
      train_imgs_curr = dataset_class(
        **{"config": config,
           "split": train_partition,
           "purpose": "train"}  # return training tuples, not including labels
      )
      if config.use_doersch_datasets:
        train_imgs_curr = DoerschDataset(config, train_imgs_curr)

      train_imgs_list.append(train_imgs_curr)

    train_imgs = ConcatDataset(train_imgs_list)

    train_dataloader = torch.utils.data.DataLoader(train_imgs,
                                                   batch_size=config.dataloader_batch_sz,
                                                   shuffle=do_shuffle,
                                                   num_workers=0,
                                                   drop_last=False)

    if d_i > 0:
      assert (len(train_dataloader) == len(dataloaders[d_i - 1]))

    dataloaders.append(train_dataloader)

  num_train_batches = len(dataloaders[0])
  print("Length of paired datasets vector %d" % len(dataloaders))
  print("Number of batches per epoch: %d" % num_train_batches)
  sys.stdout.flush()

  return dataloaders 
Example #16
Source File: build.py    From afm_cvpr2019 with MIT License 5 votes vote down vote up
def build_train_dataset(config):
    
    root_list = [osp.abspath(osp.join(osp.dirname(__file__),'..','data', f)) for f in config.DATASETS.TRAIN]

    IN_RES = [config.INPUT.IN_RES]*2
    OUT_RES= [config.INPUT.OUT_RES]*2

    get_dataset = lambda path: AFMTrainDataset(path, img_res=IN_RES, afm_res=OUT_RES)
    
    dataset = data.ConcatDataset(list(map(get_dataset,root_list)))    

    dataset = data.DataLoader(dataset, batch_size=config.SOLVER.BATCH_SIZE,shuffle=True,num_workers=config.DATALOADER.NUM_WORKERS, pin_memory=True)

    return dataset 
Example #17
Source File: run_hnn.py    From mt-dnn with MIT License 5 votes vote down vote up
def build_training_data_mt(args, tokenizer):
  if args.group_tasks:
    return build_training_data(args, tokenizer, args.tasks)
  else:
    data = []
    for t in args.tasks:
      data.append(build_training_data(args, tokenizer, [t]))
    return ConcatDataset(data) 
Example #18
Source File: data.py    From continual-learning with MIT License 5 votes vote down vote up
def get_dataset(name, type='train', download=True, capacity=None, permutation=None, dir='./datasets',
                verbose=False, target_transform=None):
    '''Create [train|valid|test]-dataset.'''

    data_name = 'mnist' if name=='mnist28' else name
    dataset_class = AVAILABLE_DATASETS[data_name]

    # specify image-transformations to be applied
    dataset_transform = transforms.Compose([
        *AVAILABLE_TRANSFORMS[name],
        transforms.Lambda(lambda x: _permutate_image_pixels(x, permutation)),
    ])

    # load data-set
    dataset = dataset_class('{dir}/{name}'.format(dir=dir, name=data_name), train=False if type=='test' else True,
                            download=download, transform=dataset_transform, target_transform=target_transform)

    # print information about dataset on the screen
    if verbose:
        print("  --> {}: '{}'-dataset consisting of {} samples".format(name, type, len(dataset)))

    # if dataset is (possibly) not large enough, create copies until it is.
    if capacity is not None and len(dataset) < capacity:
        dataset = ConcatDataset([copy.deepcopy(dataset) for _ in range(int(np.ceil(capacity / len(dataset))))])

    return dataset


#----------------------------------------------------------------------------------------------------------# 
Example #19
Source File: data.py    From pytorch-deep-generative-replay with MIT License 5 votes vote down vote up
def get_dataset(name, train=True, permutation=None, capacity=None):
    dataset = (TRAIN_DATASETS[name] if train else TEST_DATASETS[name])()
    dataset.transform = transforms.Compose([
        dataset.transform,
        transforms.Lambda(lambda x: _permutate_image_pixels(x, permutation)),
    ])

    if capacity is not None and len(dataset) < capacity:
        return ConcatDataset([
            copy.deepcopy(dataset) for _ in
            range(math.ceil(capacity / len(dataset)))
        ])
    else:
        return dataset 
Example #20
Source File: core.py    From texture_fields with MIT License 5 votes vote down vote up
def __init__(self, dataset_folder, fields, split=None,
                 classes=None, no_except=True, transform=None):
        # Read metadata file
        metadata_file = os.path.join(dataset_folder, 'metadata.yaml')
        if os.path.exists(metadata_file):
            with open(metadata_file, 'r') as f:
                metadata = yaml.load(f)
        else:
            metadata = {}

        # If classes is None, use all subfolders
        if classes is None:
            classes = os.listdir(dataset_folder)
            classes = [c for c in classes
                       if os.path.isdir(os.path.join(dataset_folder, c))]

        # Get all sub-datasets
        self.datasets_classes = []
        for c in classes:
            subpath = os.path.join(dataset_folder, c)
            if not os.path.isdir(subpath):
                logger.warning('Class %s does not exist in dataset.' % c)

            metadata_c = metadata.get(c, {'id': c, 'name': 'n/a'})
            dataset = Shapes3dClassDataset(subpath, fields, split,
                                           metadata_c, no_except,
                                           transform=transform)
            self.datasets_classes.append(dataset)

        self._concat_dataset = data.ConcatDataset(self.datasets_classes) 
Example #21
Source File: train_ehpi.py    From ehpi_action_recognition with MIT License 5 votes vote down vote up
def get_train_set(dataset_path: str, image_size: ImageSize):
    num_joints = 15
    left_indexes: List[int] = [3, 4, 5, 9, 10, 11]
    right_indexes: List[int] = [6, 7, 8, 12, 13, 14]

    datasets: List[EhpiDataset] = [
        # Set 1
        EhpiDataset(os.path.join(dataset_path, "ofp_record_2019_03_11_HSRT_30FPS"),
                    transform=transforms.Compose([
                        RemoveJointsOutsideImgEhpi(image_size),
                        ScaleEhpi(image_size),
                        TranslateEhpi(image_size),
                        FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                        NormalizeEhpi(image_size)
                    ]), num_joints=num_joints, dataset_part=DatasetPart.TEST),
        # Set 2
        EhpiDataset(os.path.join(dataset_path, "2019_03_13_Freilichtmuseum_30FPS"),
                    transform=transforms.Compose([
                        RemoveJointsOutsideImgEhpi(image_size),
                        ScaleEhpi(image_size),
                        TranslateEhpi(image_size),
                        FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                        NormalizeEhpi(image_size)
                    ]), num_joints=num_joints, dataset_part=DatasetPart.TRAIN),
    ]
    for dataset in datasets:
        dataset.print_label_statistics()

    return ConcatDataset(datasets) 
Example #22
Source File: train_its_journal_2019.py    From ehpi_action_recognition with MIT License 5 votes vote down vote up
def get_training_set_both(dataset_path: str, image_size: ImageSize):
    num_joints = 15
    left_indexes: List[int] = [3, 4, 5, 9, 10, 11]
    right_indexes: List[int] = [6, 7, 8, 12, 13, 14]

    datasets: List[EhpiLSTMDataset] = [
        EhpiLSTMDataset(os.path.join(dataset_path, "JOURNAL_2019_03_POSEALGO_30fps"),
                        transform=transforms.Compose([
                            RemoveJointsOutsideImgEhpi(image_size),
                            ScaleEhpi(image_size),
                            TranslateEhpi(image_size),
                            FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                            NormalizeEhpi(image_size)
                        ]), num_joints=num_joints),
        EhpiLSTMDataset(os.path.join(dataset_path, "JOURNAL_2019_03_GT_30fps"),
                        transform=transforms.Compose([
                            RemoveJointsOutsideImgEhpi(image_size),
                            ScaleEhpi(image_size),
                            TranslateEhpi(image_size),
                            FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                            NormalizeEhpi(image_size)
                        ]), num_joints=num_joints),
    ]
    for dataset in datasets:
        dataset.print_label_statistics()

    return ConcatDataset(datasets) 
Example #23
Source File: train_ehpi_itsc_2019_ofp.py    From ehpi_action_recognition with MIT License 5 votes vote down vote up
def get_sim_gt_only(dataset_path: str, image_size: ImageSize):
    num_joints = 15
    left_indexes: List[int] = [3, 4, 5, 9, 10, 11]
    right_indexes: List[int] = [6, 7, 8, 12, 13, 14]

    datasets: List[EhpiDataset] = [
        EhpiDataset(os.path.join(dataset_path, "ofp_sim_gt_equal_30fps"),
                    transform=transforms.Compose([
                        RemoveJointsOutsideImgEhpi(image_size),
                        RemoveJointsEhpi(indexes_to_remove=foot_indexes, indexes_to_remove_2=knee_indexes,
                                         probability=0.25),
                        ScaleEhpi(image_size),
                        TranslateEhpi(image_size),
                        FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                        NormalizeEhpi(image_size)
                    ]), num_joints=num_joints),
        EhpiDataset(os.path.join(dataset_path, "ofp_from_mocap_gt_30fps"),
                    transform=transforms.Compose([
                        RemoveJointsOutsideImgEhpi(image_size),
                        RemoveJointsEhpi(indexes_to_remove=foot_indexes, indexes_to_remove_2=knee_indexes,
                                         probability=0.25),
                        ScaleEhpi(image_size),
                        TranslateEhpi(image_size),
                        FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                        NormalizeEhpi(image_size)
                    ]), num_joints=num_joints),
    ]
    for dataset in datasets:
        dataset.print_label_statistics()

    return ConcatDataset(datasets) 
Example #24
Source File: train_ehpi_itsc_2019_ofp.py    From ehpi_action_recognition with MIT License 5 votes vote down vote up
def get_sim_pose_algo_only(dataset_path: str, image_size: ImageSize):
    num_joints = 15
    left_indexes: List[int] = [3, 4, 5, 9, 10, 11]
    right_indexes: List[int] = [6, 7, 8, 12, 13, 14]

    datasets: List[EhpiDataset] = [
        EhpiDataset(os.path.join(dataset_path, "ofp_sim_pose_algo_equal_30fps"),
                    transform=transforms.Compose([
                        RemoveJointsOutsideImgEhpi(image_size),
                        RemoveJointsEhpi(indexes_to_remove=foot_indexes, indexes_to_remove_2=knee_indexes,
                                         probability=0.25),
                        ScaleEhpi(image_size),
                        TranslateEhpi(image_size),
                        FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                        NormalizeEhpi(image_size)
                    ]), num_joints=num_joints),
        EhpiDataset(os.path.join(dataset_path, "ofp_from_mocap_pose_algo_30fps"),
                    transform=transforms.Compose([
                        RemoveJointsOutsideImgEhpi(image_size),
                        RemoveJointsEhpi(indexes_to_remove=foot_indexes, indexes_to_remove_2=knee_indexes,
                                         probability=0.25),
                        ScaleEhpi(image_size),
                        TranslateEhpi(image_size),
                        FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                        NormalizeEhpi(image_size)
                    ]), num_joints=num_joints),
    ]
    for dataset in datasets:
        dataset.print_label_statistics()

    return ConcatDataset(datasets) 
Example #25
Source File: dataset.py    From source_separation with Apache License 2.0 5 votes vote down vote up
def get_concated_datasets(meta_dir_list: List[str], batch_size: int, num_workers: int,
                          meta_cls_list: List[MetaFrame],
                          fix_len: int = 0, skip_audio: bool = False, sample_rate: int = 44100,
                          audio_mask: bool = False) -> Tuple[SpeechDataLoader, SpeechDataLoader]:

    assert all([os.path.isdir(x) for x in meta_dir_list]), 'There are not valid directory paths!'.format()
    assert len(meta_dir_list) == len(meta_cls_list), 'meta_dir_list, meta_cls_list are must have same length!'

    # datasets
    train_datasets = []
    valid_datasets = []

    for meta_cls, meta_dir in zip(meta_cls_list, meta_dir_list):
        train_file, valid_file = meta_cls.frame_file_names[1:]

        # load meta file
        train_meta = meta_cls(os.path.join(meta_dir, train_file), sr=sample_rate)
        valid_meta = meta_cls(os.path.join(meta_dir, valid_file), sr=sample_rate)

        # create dataset
        train_dataset = AugmentSpeechDataset(train_meta, fix_len=fix_len, skip_audio=skip_audio, audio_mask=audio_mask)
        valid_dataset = AugmentSpeechDataset(valid_meta, fix_len=fix_len, skip_audio=skip_audio, audio_mask=audio_mask)

        train_datasets.append(train_dataset)
        valid_datasets.append(valid_dataset)

    # make concat dataset
    train_conc_dataset = ConcatDataset(train_datasets)
    valid_conc_dataset = ConcatDataset(valid_datasets)

    # create data loader
    train_loader = SpeechDataLoader(train_conc_dataset, batch_size=batch_size, is_bucket=False,
                                    num_workers=num_workers, skip_last_bucket=False)
    valid_loader = SpeechDataLoader(valid_conc_dataset, batch_size=batch_size, is_bucket=False,
                                    num_workers=num_workers, skip_last_bucket=False)

    return train_loader, valid_loader 
Example #26
Source File: language_modeling.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def load_dataset(self, split, combine=False):
        """Load a dataset split."""

        loaded_datasets = []

        for k in itertools.count():
            split_k = split + (str(k) if k > 0 else '')
            path = os.path.join(self.args.data, split_k)

            if self.args.raw_text and IndexedRawTextDataset.exists(path):
                ds = IndexedRawTextDataset(path, self.dictionary)
                tokens = [t for l in ds.tokens_list for t in l]
            elif not self.args.raw_text and IndexedInMemoryDataset.exists(path):
                ds = IndexedInMemoryDataset(path, fix_lua_indexing=True)
                tokens = ds.buffer
            else:
                if k > 0:
                    break
                else:
                    raise FileNotFoundError('Dataset not found: {} ({})'.format(split, self.args.data))

            loaded_datasets.append(
                TokenBlockDataset(
                    tokens, ds.sizes, self.args.tokens_per_sample, self.args.sample_break_mode,
                    include_targets=True
                ))

            print('| {} {} {} examples'.format(self.args.data, split_k, len(loaded_datasets[-1])))

            if not combine:
                break

        if len(loaded_datasets) == 1:
            dataset = loaded_datasets[0]
            sizes = dataset.sizes
        else:
            dataset = ConcatDataset(loaded_datasets)
            sizes = np.concatenate([ds.sizes for ds in loaded_datasets])

        self.datasets[split] = MonolingualDataset(dataset, sizes, self.dictionary, shuffle=False) 
Example #27
Source File: slides.py    From torchsupport with MIT License 5 votes vote down vote up
def MultiSlideData(self, paths, size=(224, 224), level=0, transform=lambda x: x):
  datasets = []
  for path in paths:
    datasets.append(SingleSlideData(path, size=size, level=level, transform=transform))
  return ConcatDataset(datasets) 
Example #28
Source File: dataloader.py    From OpenLongTailRecognition-OLTR with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_data(data_root, dataset, phase, batch_size, sampler_dic=None, num_workers=4, test_open=False, shuffle=True):
    
    txt = './data/%s/%s_%s.txt'%(dataset, dataset, (phase if phase != 'train_plain' else 'train'))

    print('Loading data from %s' % (txt))

    if phase not in ['train', 'val']:
        transform = data_transforms['test']
    else:
        transform = data_transforms[phase]

    print('Use data transformation:', transform)

    set_ = LT_Dataset(data_root, txt, transform)

    if phase == 'test' and test_open:
        open_txt = './data/%s/%s_open.txt'%(dataset, dataset)
        print('Testing with opensets from %s'%(open_txt))
        open_set_ = LT_Dataset('./data/%s/%s_open'%(dataset, dataset), open_txt, transform)
        set_ = ConcatDataset([set_, open_set_])

    if sampler_dic and phase == 'train':
        print('Using sampler.')
        print('Sample %s samples per-class.' % sampler_dic['num_samples_cls'])
        return DataLoader(dataset=set_, batch_size=batch_size, shuffle=False,
                           sampler=sampler_dic['sampler'](set_, sampler_dic['num_samples_cls']),
                           num_workers=num_workers)
    else:
        print('No sampler.')
        print('Shuffle is %s.' % (shuffle))
        return DataLoader(dataset=set_, batch_size=batch_size,
                          shuffle=shuffle, num_workers=num_workers) 
Example #29
Source File: video_datasets.py    From RCRNet-Pytorch with MIT License 4 votes vote down vote up
def get_datasets(name_list, split_list, config_path, root, training, transforms,
                    read_clip=False, random_reverse_clip=False, label_interval=1, frame_between_label_num=0, clip_len=4):
    """
        return type of data.ConcatDataset or single dataset data.Dataset
    """
    if not isinstance(name_list, list):
        name_list = [name_list]
    if not isinstance(split_list, list):
        split_list = [split_list]
    if len(name_list) != len(split_list):
        raise ValueError("Dataset numbers must match split numbers")
    # read dataset config
    datasets_config = yaml.load(open(config_path))
    # get datasets
    dataset_list = []
    for name, split in zip(name_list, split_list):
        if name not in datasets_config.keys():
            raise ValueError("Error dataset name {}".format(name))

        dataset_config = datasets_config[name]
        dataset_config['name'] = name
        dataset_config['root'] = root
        dataset_config['split'] = split
        dataset_config['training'] = training
        dataset_config['transforms'] = transforms

        if "video_split" in dataset_config:
            dataset_config['label_interval'] = label_interval
            dataset_config['frame_between_label_num'] = frame_between_label_num
            if read_clip:
                dataset = VideoClipDataset(clip_len=clip_len,
                                        random_reverse_clip=random_reverse_clip,
                                        **dataset_config)
            else:
                dataset = VideoImageDataset(**dataset_config)
        else:
            dataset = ImageDataset(**dataset_config)

        dataset_list.append(dataset)

    if len(dataset_list) == 1:
        return dataset_list[0]
    else:
        return data.ConcatDataset(dataset_list) 
Example #30
Source File: train_ehpi_itsc_2019_ofp.py    From ehpi_action_recognition with MIT License 4 votes vote down vote up
def get_set_wo_sim(dataset_path: str, image_size: ImageSize):
    num_joints = 15
    left_indexes: List[int] = [3, 4, 5, 9, 10, 11]
    right_indexes: List[int] = [6, 7, 8, 12, 13, 14]

    datasets: List[EhpiDataset] = [
        EhpiDataset(os.path.join(dataset_path, "ofp_webcam"),
                    transform=transforms.Compose([
                        RemoveJointsOutsideImgEhpi(image_size),
                        ScaleEhpi(image_size),
                        TranslateEhpi(image_size),
                        FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                        NormalizeEhpi(image_size)
                    ]), num_joints=num_joints),
        EhpiDataset(os.path.join(dataset_path, "ofp_record_2019_03_11_30FPS"),
                    transform=transforms.Compose([
                        RemoveJointsOutsideImgEhpi(image_size),
                        ScaleEhpi(image_size),
                        TranslateEhpi(image_size),
                        FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                        NormalizeEhpi(image_size)
                    ]), num_joints=num_joints),
        EhpiDataset(os.path.join(dataset_path, "ofp_record_2019_03_11_HSRT_30FPS"),
                    transform=transforms.Compose([
                        RemoveJointsOutsideImgEhpi(image_size),
                        ScaleEhpi(image_size),
                        TranslateEhpi(image_size),
                        FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                        NormalizeEhpi(image_size)
                    ]), num_joints=num_joints, dataset_part=DatasetPart.TEST),
        EhpiDataset(os.path.join(dataset_path, "ofp_record_2019_03_11_HELLA_30FPS"),
                    transform=transforms.Compose([
                        RemoveJointsOutsideImgEhpi(image_size),
                        ScaleEhpi(image_size),
                        TranslateEhpi(image_size),
                        FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                        NormalizeEhpi(image_size)
                    ]), num_joints=num_joints, dataset_part=DatasetPart.TRAIN),
        # Freilichtmuseum
        EhpiDataset(os.path.join(dataset_path, "2019_03_13_Freilichtmuseum_30FPS"),
                    transform=transforms.Compose([
                        RemoveJointsOutsideImgEhpi(image_size),
                        ScaleEhpi(image_size),
                        TranslateEhpi(image_size),
                        FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes),
                        NormalizeEhpi(image_size)
                    ]), num_joints=num_joints, dataset_part=DatasetPart.TRAIN),
    ]
    for dataset in datasets:
        dataset.print_label_statistics()

    return ConcatDataset(datasets)