Python random.shuffle() Examples

The following are 30 code examples of random.shuffle(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module random , or try the search function .
Example #1
Source File: DataLoader_NER.py    From pytorch_NER_BiLSTM_CNN_CRF with Apache License 2.0 6 votes vote down vote up
def __init__(self, path, shuffle, config):
        """
        :param path: data path list
        :param shuffle:  shuffle bool
        :param config:  config
        """
        #
        print("Loading Data......")
        self.data_list = []
        self.max_count = config.max_count
        self.path = path
        self.shuffle = shuffle
        # char feature
        self.pad_char = [char_pad, char_pad]
        # self.pad_char = []
        self.max_char_len = config.max_char_len 
Example #2
Source File: estimator_utils.py    From EDeN with MIT License 6 votes vote down vote up
def make_train_test_sets(pos_graphs, neg_graphs,
                         test_proportion=.3, random_state=2):
    """make_train_test_sets."""
    random.seed(random_state)
    random.shuffle(pos_graphs)
    random.shuffle(neg_graphs)
    pos_dim = len(pos_graphs)
    neg_dim = len(neg_graphs)
    tr_pos_graphs = pos_graphs[:-int(pos_dim * test_proportion)]
    te_pos_graphs = pos_graphs[-int(pos_dim * test_proportion):]
    tr_neg_graphs = neg_graphs[:-int(neg_dim * test_proportion)]
    te_neg_graphs = neg_graphs[-int(neg_dim * test_proportion):]
    tr_graphs = tr_pos_graphs + tr_neg_graphs
    te_graphs = te_pos_graphs + te_neg_graphs
    tr_targets = [1] * len(tr_pos_graphs) + [0] * len(tr_neg_graphs)
    te_targets = [1] * len(te_pos_graphs) + [0] * len(te_neg_graphs)
    tr_graphs, tr_targets = paired_shuffle(tr_graphs, tr_targets)
    te_graphs, te_targets = paired_shuffle(te_graphs, te_targets)
    return (tr_graphs, np.array(tr_targets)), (te_graphs, np.array(te_targets)) 
Example #3
Source File: utils.py    From deep-learning-note with MIT License 6 votes vote down vote up
def data_iter_random(corpus_indices, batch_size, num_steps, device=None):
    # 减1是因为输出的索引x是相应输入的索引y加1
    num_examples = (len(corpus_indices) - 1) // num_steps
    epoch_size = num_examples // batch_size
    example_indices = list(range(num_examples))
    random.shuffle(example_indices)

    # 返回从pos开始的长为num_steps的序列
    def _data(pos):
        return corpus_indices[pos: pos + num_steps]

    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    for i in range(epoch_size):
        # 每次读取batch_size个随机样本
        i = i * batch_size
        batch_indices = example_indices[i: i + batch_size]
        X = [_data(j * num_steps) for j in batch_indices]
        Y = [_data(j * num_steps + 1) for j in batch_indices]
        yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device) 
Example #4
Source File: detection.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def __init__(self, batch_size, data_shape,
                 path_imgrec=None, path_imglist=None, path_root=None, path_imgidx=None,
                 shuffle=False, part_index=0, num_parts=1, aug_list=None, imglist=None,
                 data_name='data', label_name='label', **kwargs):
        super(ImageDetIter, self).__init__(batch_size=batch_size, data_shape=data_shape,
                                           path_imgrec=path_imgrec, path_imglist=path_imglist,
                                           path_root=path_root, path_imgidx=path_imgidx,
                                           shuffle=shuffle, part_index=part_index,
                                           num_parts=num_parts, aug_list=[], imglist=imglist,
                                           data_name=data_name, label_name=label_name)

        if aug_list is None:
            self.auglist = CreateDetAugmenter(data_shape, **kwargs)
        else:
            self.auglist = aug_list

        # went through all labels to get the proper label shape
        label_shape = self._estimate_label_shape()
        self.provide_label = [(label_name, (self.batch_size, label_shape[0], label_shape[1]))]
        self.label_shape = label_shape 
Example #5
Source File: test_recordio.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def test_indexed_recordio():
    fidx = tempfile.mktemp()
    frec = tempfile.mktemp()
    N = 255

    writer = mx.recordio.MXIndexedRecordIO(fidx, frec, 'w')
    for i in range(N):
        if sys.version_info[0] < 3:
            writer.write_idx(i, str(chr(i)))
        else:
            writer.write_idx(i, bytes(str(chr(i)), 'utf-8'))
    del writer

    reader = mx.recordio.MXIndexedRecordIO(fidx, frec, 'r')
    keys = reader.keys
    assert sorted(keys) == [i for i in range(N)]
    random.shuffle(keys)
    for i in keys:
        res = reader.read_idx(i)
        if sys.version_info[0] < 3:
            assert res == str(chr(i))
        else:
            assert res == bytes(str(chr(i)), 'utf-8') 
Example #6
Source File: BasePythonDataLayer.py    From Caffe-Python-Data-Layer with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def preload_db(self):
        """Read all images in and all labels

        Implemenation relies on DataManager Classes
        """
        print("Preloading Data...")
        if self._source_type == 'BCF':
            self._data_manager = BCFDataManager(self._layer_params)
        elif self._source_type == 'CSV':
            self._data_manager = CSVDataManager(self._layer_params)
        elif self._source_type == 'LMDB':
            self._data_manager = LMDBDataManager(self._layer_params)
        # read all data
        self._data, self._label = self._data_manager.load_all()
        self._sample_count = len(self._data)
        if self._shuffle:
            self.shuffle() 
Example #7
Source File: BasePythonDataLayer.py    From Caffe-Python-Data-Layer with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setup(self, bottom, top):
        layer_params = yaml.load(self.param_str)
        self._layer_params = layer_params
        # default batch_size = 256
        self._batch_size = int(layer_params.get('batch_size', 256))
        self._resize = layer_params.get('resize', -1)
        self._mean_file = layer_params.get('mean_file', None)
        self._source_type = layer_params.get('source_type', 'CSV')
        self._shuffle = layer_params.get('shuffle', False)
        # read image_mean from file and preload all data into memory
        # will read either file or array into self._mean
        self.set_mean()
        self.preload_db()
        self._compressed = self._layer_params.get('compressed', True)
        if not self._compressed:
            self.decompress_data() 
Example #8
Source File: 30_series_sampling.py    From deep-learning-note with MIT License 6 votes vote down vote up
def data_iter_random(corpus_indices, batch_size, num_steps, device=None):
    # 减1是因为输出的索引x是相应输入的索引y加1
    num_examples = (len(corpus_indices) - 1) // num_steps
    epoch_size = num_examples // batch_size
    example_indices = list(range(num_examples))
    random.shuffle(example_indices)

    # 返回从pos开始的长为num_steps的序列
    def _data(pos):
        return corpus_indices[pos: pos + num_steps]
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    for i in range(epoch_size):
        # 每次读取batch_size个随机样本
        i = i * batch_size
        batch_indices = example_indices[i: i + batch_size]
        X = [_data(j * num_steps) for j in batch_indices]
        Y = [_data(j * num_steps + 1) for j in batch_indices]
        yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device) 
Example #9
Source File: utils.py    From deep-learning-note with MIT License 6 votes vote down vote up
def load_data_fashion_mnist(batch_size, resize=None, root='./data'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())

    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter 
Example #10
Source File: data.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def get_caltech101_iterator(batch_size, num_workers, dtype):
    def transform(image, label):
        # resize the shorter edge to 224, the longer edge will be greater or equal to 224
        resized = mx.image.resize_short(image, 224)
        # center and crop an area of size (224,224)
        cropped, crop_info = mx.image.center_crop(resized, (224, 224))
        # transpose the channels to be (3,224,224)
        transposed = mx.nd.transpose(cropped, (2, 0, 1))
        return transposed, label

    training_path, testing_path = get_caltech101_data()
    dataset_train = ImageFolderDataset(root=training_path, transform=transform)
    dataset_test = ImageFolderDataset(root=testing_path, transform=transform)

    train_data = DataLoader(dataset_train, batch_size, shuffle=True, num_workers=num_workers)
    test_data = DataLoader(dataset_test, batch_size, shuffle=False, num_workers=num_workers)
    return DataLoaderIter(train_data), DataLoaderIter(test_data) 
Example #11
Source File: atomic.py    From comet-commonsense with Apache License 2.0 6 votes vote down vote up
def shuffle_sequences(self, split="train", keys=None):
        if keys is None:
            # print(type(self.data))
            # print(type(self.data.keys()))
            keys = self.data[split].keys()

        for key in keys:
            idxs = list(range(len(self.data[split][key])))

            random.shuffle(idxs)

            self.sequences[split][key] = \
                self.sequences[split][key].index_select(
                    0, torch.LongTensor(idxs))

            temp = [self.data[split][key][i] for i in idxs]
            self.data[split][key] = temp
            temp = [self.masks[split][key][i] for i in idxs]
            self.masks[split][key] = temp 
Example #12
Source File: conceptnet.py    From comet-commonsense with Apache License 2.0 6 votes vote down vote up
def shuffle_sequences(self, split="train", keys=None):
        if keys is None:
            # print(type(self.data))
            # print(type(self.data.keys()))
            keys = self.data[split].keys()

        for key in keys:
            if key in ["positive", "negative"]:
                continue
            idxs = list(range(len(self.data[split][key])))

            random.shuffle(idxs)

            self.sequences[split][key] = \
                self.sequences[split][key].index_select(
                    0, torch.LongTensor(idxs))

            temp = [self.data[split][key][i] for i in idxs]
            self.data[split][key] = temp

            temp = [self.masks[split][key][i] for i in idxs]
            self.masks[split][key] = temp 
Example #13
Source File: DataLoader_NER.py    From pytorch_NER_BiLSTM_CNN_CRF with Apache License 2.0 6 votes vote down vote up
def dataLoader(self):
        """
        :return:
        """
        path = self.path
        shuffle = self.shuffle
        assert isinstance(path, list), "Path Must Be In List"
        print("Data Path {}".format(path))
        for id_data in range(len(path)):
            print("Loading Data Form {}".format(path[id_data]))
            insts = self._Load_Each_Data(path=path[id_data], shuffle=shuffle)
            random.shuffle(insts)
            self._write_shuffle_inst_to_file(insts, path=path[id_data])
            self.data_list.append(insts)
        # return train/dev/test data
        if len(self.data_list) == 3:
            return self.data_list[0], self.data_list[1], self.data_list[2]
        elif len(self.data_list) == 2:
            return self.data_list[0], self.data_list[1] 
Example #14
Source File: data.py    From VSE-C with MIT License 6 votes vote down vote up
def get_loader_single(data_name, split, root, json, vocab, transform,
                      batch_size=100, shuffle=True,
                      num_workers=2, ids=None, collate_fn=collate_fn):
    """Returns torch.utils.data.DataLoader for custom coco dataset."""
    if 'coco' in data_name:
        # COCO custom dataset
        dataset = CocoDataset(root=root,
                              json=json,
                              vocab=vocab,
                              transform=transform, ids=ids)
    elif 'f8k' in data_name or 'f30k' in data_name:
        dataset = FlickrDataset(root=root,
                                split=split,
                                json=json,
                                vocab=vocab,
                                transform=transform)

    # Data loader
    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                              shuffle=shuffle,
                                              pin_memory=True,
                                              num_workers=num_workers,
                                              collate_fn=collate_fn)
    return data_loader 
Example #15
Source File: chainer_alex.py    From mlimages with MIT License 6 votes vote down vote up
def show(limit, shuffle=True):
    td = TrainingData(LABEL_FILE, img_root=IMAGES_ROOT, mean_image_file=MEAN_IMAGE_FILE, image_property=IMAGE_PROP)
    _limit = limit if limit > 0 else 5
    iterator = td.generate()
    if shuffle:
        import random
        shuffled = list(iterator)
        random.shuffle(shuffled)
        iterator = iter(shuffled)

    i = 0
    for arr, im in iterator:
        restored = td.data_to_image(arr, im.label, raw=True)
        print(im.path)
        restored.image.show()
        i += 1
        if i >= _limit:
            break 
Example #16
Source File: concat_db.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def _load_image_set_index(self, shuffle):
        """
        get total number of images, init indices

        Parameters
        ----------
        shuffle : bool
            whether to shuffle the initial indices
        """
        self.num_images = 0
        for db in self.imdbs:
            self.num_images += db.num_images
        indices = list(range(self.num_images))
        if shuffle:
            random.shuffle(indices)
        return indices 
Example #17
Source File: iterators.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def reset(self):
        """Resets the iterator to the beginning of the data."""
        self.curr_idx = 0
        #shuffle data in each bucket
        random.shuffle(self.idx)
        for i, buck in enumerate(self.sentences):
            self.indices[i], self.sentences[i], self.characters[i], self.label[i] = shuffle(self.indices[i],
                                                                                            self.sentences[i],
                                                                                            self.characters[i],
                                                                                            self.label[i])

        self.ndindex = []
        self.ndsent = []
        self.ndchar = []
        self.ndlabel = []

        #for each bucket of data
        for i, buck in enumerate(self.sentences):
            #append the lists with an array
            self.ndindex.append(ndarray.array(self.indices[i], dtype=self.dtype))
            self.ndsent.append(ndarray.array(self.sentences[i], dtype=self.dtype))
            self.ndchar.append(ndarray.array(self.characters[i], dtype=self.dtype))
            self.ndlabel.append(ndarray.array(self.label[i], dtype=self.dtype)) 
Example #18
Source File: turing.py    From gated-graph-transformer-network with MIT License 6 votes vote down vote up
def encode_turing_machine_rules(rules, starting_state=None, story=None):
    if story is None:
        story = graph_tools.Story()
    graph = story.graph
    if starting_state is None:
        starting_state = random.choice(len(rules))
    the_edges = [(cstate, read, write, nstate, direc)
                    for (cstate, stuff) in enumerate(rules)
                    for (read, (write, nstate, direc)) in enumerate(stuff)]
    random.shuffle(the_edges)
    for cstate, read, write, nstate, direc in the_edges:
        source = graph.make_unique('state_{}'.format(cstate))
        dest = graph.make_unique('state_{}'.format(nstate))
        edge_type = "rule_{}_{}_{}".format(read,write,direc)
        source[edge_type] = dest
        story.add_line("rule {} {} {} {} {}".format(source.type, read, write, dest.type, direc))
    head = graph.make_unique('head')

    head.state = graph.make_unique('state_{}'.format(starting_state))
    story.add_line("start {}".format(head.state.type))
    return story 
Example #19
Source File: MoveGenerator.py    From fullrmc with GNU Affero General Public License v3.0 6 votes vote down vote up
def move(self, coordinates):
        """
        Move coordinates.

        :Parameters:
            #. coordinates (np.ndarray): The coordinates on which to apply
               the transformation.

        :Returns:
            #. coordinates (np.ndarray): The new coordinates after applying
               the transformation.
        """
        indexes = range(len(self.__combination))
        if self.__shuffle:
            shuffle( indexes )
        # create the move combination
        for idx in indexes:
            coordinates = self.__combination[idx].move(coordinates)
        return coordinates 
Example #20
Source File: data.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def get_imagenet_iterator(root, batch_size, num_workers, data_shape=224, dtype='float32'):
    """Dataset loader with preprocessing."""
    train_dir = os.path.join(root, 'train')
    train_transform, val_transform = get_imagenet_transforms(data_shape, dtype)
    logging.info("Loading image folder %s, this may take a bit long...", train_dir)
    train_dataset = ImageFolderDataset(train_dir, transform=train_transform)
    train_data = DataLoader(train_dataset, batch_size, shuffle=True,
                            last_batch='discard', num_workers=num_workers)
    val_dir = os.path.join(root, 'val')
    if not os.path.isdir(os.path.expanduser(os.path.join(root, 'val', 'n01440764'))):
        user_warning = 'Make sure validation images are stored in one subdir per category, a helper script is available at https://git.io/vNQv1'
        raise ValueError(user_warning)
    logging.info("Loading image folder %s, this may take a bit long...", val_dir)
    val_dataset = ImageFolderDataset(val_dir, transform=val_transform)
    val_data = DataLoader(val_dataset, batch_size, last_batch='keep', num_workers=num_workers)
    return DataLoaderIter(train_data, dtype), DataLoaderIter(val_data, dtype) 
Example #21
Source File: MoveGenerator.py    From fullrmc with GNU Affero General Public License v3.0 5 votes vote down vote up
def _codify__(self, name='generator', group=None, addDependencies=True):
        assert isinstance(name, basestring), LOGGER.error("name must be a string")
        assert re.match('[a-zA-Z_][a-zA-Z0-9_]*$', name) is not None, LOGGER.error("given name '%s' can't be used as a variable name"%name)
        dependencies = collections.OrderedDict()
        dependencies['from fullrmc.Core import MoveGenerator'] = True
        code         = []
        combination  = []
        # codify generators
        for idx, gen in enumerate(self.__combination):
            nm      = '%s_%i'%(name,idx)
            dep, cd = gen._codify__(group=None, name=nm, addDependencies=True)
            code.append(cd)
            combination.append(nm)
            for d in dep:
                _ = dependencies.setdefault(d,True)
        # codify combinator
        code.append("{name} = MoveGenerator.MoveGeneratorCombinator\
(group={group}, combination=[{combination}], shuffle={shuffle})"
.format(name=name, group=group, combination=', '.join(combination), shuffle=self.shuffle))
        # set dependencies
        dependencies = list(dependencies)
        # add dependencies
        if addDependencies:
            code = dependencies + [''] + code
        # return
        return dependencies, '\n'.join(code) 
Example #22
Source File: MoveGenerator.py    From fullrmc with GNU Affero General Public License v3.0 5 votes vote down vote up
def shuffle(self):
        """ Shuffle flag."""
        return self.__shuffle 
Example #23
Source File: data.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def iterate_forever(self, batch_size, num_steps):
        def file_stream():
            while True:
                file_patterns = glob.glob(self._file_pattern)
                if not self._shuffle:
                    random.shuffle(file_patterns)
                for file_name in file_patterns:
                    yield file_name
        for value in self._iterate(self._sentence_stream(file_stream()), batch_size, num_steps):
            yield value 
Example #24
Source File: data.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def iterate_once(self, batch_size, num_steps):
        def file_stream():
            file_patterns = glob.glob(self._file_pattern)
            if not self._shuffle:
                random.shuffle(file_patterns)
            for file_name in file_patterns:
                yield file_name
        for value in self._iterate(self._sentence_stream(file_stream()), batch_size, num_steps):
            yield value 
Example #25
Source File: imdb.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def save_imglist(self, fname=None, root=None, shuffle=False):
        """
        save imglist to disk

        Parameters:
        ----------
        fname : str
            saved filename
        """
        def progress_bar(count, total, suffix=''):
            import sys
            bar_len = 24
            filled_len = int(round(bar_len * count / float(total)))

            percents = round(100.0 * count / float(total), 1)
            bar = '=' * filled_len + '-' * (bar_len - filled_len)
            sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', suffix))
            sys.stdout.flush()

        str_list = []
        for index in range(self.num_images):
            progress_bar(index, self.num_images)
            label = self.label_from_index(index)
            if label.size < 1:
                continue
            path = self.image_path_from_index(index)
            if root:
                path = osp.relpath(path, root)
            str_list.append('\t'.join([str(index), str(2), str(label.shape[1])] \
              + ["{0:.4f}".format(x) for x in label.ravel()] + [path,]) + '\n')
        if str_list:
            if shuffle:
                import random
                random.shuffle(str_list)
            if not fname:
                fname = self.name + '.lst'
            with open(fname, 'w') as f:
                for line in str_list:
                    f.write(line)
        else:
            raise RuntimeError("No image in imdb") 
Example #26
Source File: data.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def _parse_file(self, file_name):
        logging.debug("Processing file: %s" % file_name)
        with codecs.open(file_name, "r", "utf-8") as f:
            lines = [line.strip() for line in f]
            if not self._shuffle:
                random.shuffle(lines)
            logging.debug("Finished processing!")
            for line in lines:
                yield self._parse_sentence(line) 
Example #27
Source File: data.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def __init__(self, vocab, file_pattern, shuffle=False):
        self._vocab = vocab
        self._file_pattern = file_pattern
        self._shuffle = shuffle 
Example #28
Source File: MoveGenerator.py    From fullrmc with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, group=None, combination=None, shuffle=False):
        # set combination
        self.__combination = []
        # initialize
        super(MoveGeneratorCombinator, self).__init__(group=group)
        # set path
        self.set_combination(combination=combination)
        # set randomize
        self.set_shuffle(shuffle=shuffle) 
Example #29
Source File: MoveGenerator.py    From fullrmc with GNU Affero General Public License v3.0 5 votes vote down vote up
def set_shuffle(self, shuffle):
        """
        Set whether to shuffle moves generator.

        :Parameters:
            #. shuffle (boolean): Whether to shuffle generator instances at
               every move or to combine moves in the list order.
        """
        assert isinstance(shuffle, bool), LOGGER.error("shuffle must be boolean")
        self.__shuffle = shuffle 
Example #30
Source File: 19_char_rnn.py    From deep-learning-note with MIT License 5 votes vote down vote up
def read_data(filename, vocab, window, overlap):
    lines = [line.strip() for line in open(filename, 'r').readlines()]
    while True:
        random.shuffle(lines)

        for text in lines:
            text = vocab_encode(text, vocab)
            for start in range(0, len(text) - window, overlap):
                chunk = text[start: start + window]
                chunk += [0] * (window - len(chunk))
                yield chunk


# 批量读取数据