Python random.shuffle() Examples

The following are code examples for showing how to use random.shuffle(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: Caffe-Python-Data-Layer   Author: liuxianming   File: BasePythonDataLayer.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setup(self, bottom, top):
        layer_params = yaml.load(self.param_str)
        self._layer_params = layer_params
        # default batch_size = 256
        self._batch_size = int(layer_params.get('batch_size', 256))
        self._resize = layer_params.get('resize', -1)
        self._mean_file = layer_params.get('mean_file', None)
        self._source_type = layer_params.get('source_type', 'CSV')
        self._shuffle = layer_params.get('shuffle', False)
        # read image_mean from file and preload all data into memory
        # will read either file or array into self._mean
        self.set_mean()
        self.preload_db()
        self._compressed = self._layer_params.get('compressed', True)
        if not self._compressed:
            self.decompress_data() 
Example 2
Project: Caffe-Python-Data-Layer   Author: liuxianming   File: BasePythonDataLayer.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def preload_db(self):
        """Read all images in and all labels

        Implemenation relies on DataManager Classes
        """
        print("Preloading Data...")
        if self._source_type == 'BCF':
            self._data_manager = BCFDataManager(self._layer_params)
        elif self._source_type == 'CSV':
            self._data_manager = CSVDataManager(self._layer_params)
        elif self._source_type == 'LMDB':
            self._data_manager = LMDBDataManager(self._layer_params)
        # read all data
        self._data, self._label = self._data_manager.load_all()
        self._sample_count = len(self._data)
        if self._shuffle:
            self.shuffle() 
Example 3
Project: meta-transfer-learning   Author: erfaneshrati   File: reptile.py    MIT License 6 votes vote down vote up
def _sample_mini_dataset(dataset, num_classes, num_shots, metatransfer=False):
    """
    Sample a few shot task from a dataset.

    Returns:
      An iterable of (input, label) pairs.
    """
    shuffled = list(dataset)
    if metatransfer:
        indices = np.random.randint(64, size=num_classes)
        for class_idx, class_real_idx in enumerate(indices):
            for sample in shuffled[class_real_idx].sample(num_shots):
                yield (sample, class_idx, class_real_idx)
    else:
        random.shuffle(shuffled)
        for class_idx, class_obj in enumerate(shuffled[:num_classes]):
            for sample in class_obj.sample(num_shots):
                yield (sample, class_idx) 
Example 4
Project: explirefit   Author: codogogo   File: trainer.py    Apache License 2.0 6 votes vote down vote up
def cross_validate(self, tf_session, class_labels, data_input, data_labels, num_folds, batch_size, num_epochs, model_reset_function = None, shuffle = False, fold_avg = 'micro', cl_perf = None, overall_perf = True, num_epochs_not_better_end = 2):
		conf_matrices = []
		best_epochs = []	
		if shuffle:
			paired = list(zip(data_input, data_labels))
			random.shuffle(paired)	
			data_input, data_labels = zip(*paired)

		folds = self.cross_validation_fold(data_input, data_labels, num_folds)
		fold_counter = 1
		for fold in folds:
			print("Fold: " + str(fold_counter))
			train_input = fold[0]; train_labels = fold[1]; dev_input = fold[2]; dev_labels = fold[3]
			model_reset_function(tf_session)
			conf_mat, epoch = self.train_and_test(tf_session, class_labels, train_input, train_labels, dev_input, dev_labels, batch_size, num_epochs, cl_perf, overall_perf, num_epochs_not_better_end = num_epochs_not_better_end)
			conf_matrices.append(conf_mat)
			best_epochs.append(epoch)
			fold_counter += 1
		if fold_avg == 'macro':
			return conf_matrices, best_epochs
		elif fold_avg == 'micro':
			return confusion_matrix.merge_confusion_matrices(conf_matrices), best_epochs
		else:
			raise ValueError("Unknown value for fold_avg") 
Example 5
Project: explirefit   Author: codogogo   File: batcher.py    Apache License 2.0 6 votes vote down vote up
def batch_iter(data, batch_size, num_epochs, shuffle = True):
		"""
		Generates a batch iterator for a dataset.
		"""
		#data = np.array(data, dtype = np.int32)
		data_size = len(data)

		num_batches_per_epoch = int(data_size/batch_size) + 1
		for epoch in range(num_epochs):
			# Shuffle the data at each epoch
			if shuffle:
				#shuffle_indices = np.random.permutation(np.arange(data_size))
				#shuffled_data = data[shuffle_indices]
				random.shuffle(data)
			#else:
			#	shuffled_data = data

			for batch_num in range(num_batches_per_epoch):
				start_index = batch_num * batch_size
				end_index = min((batch_num + 1) * batch_size, data_size)
				yield data[start_index:end_index] 
Example 6
Project: pyblish-win   Author: pyblish   File: test_richcmp.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_dicts(self):
        # Verify that __eq__ and __ne__ work for dicts even if the keys and
        # values don't support anything other than __eq__ and __ne__ (and
        # __hash__).  Complex numbers are a fine example of that.
        import random
        imag1a = {}
        for i in range(50):
            imag1a[random.randrange(100)*1j] = random.randrange(100)*1j
        items = imag1a.items()
        random.shuffle(items)
        imag1b = {}
        for k, v in items:
            imag1b[k] = v
        imag2 = imag1b.copy()
        imag2[k] = v + 1.0
        self.assertTrue(imag1a == imag1a)
        self.assertTrue(imag1a == imag1b)
        self.assertTrue(imag2 == imag2)
        self.assertTrue(imag1a != imag2)
        for opname in ("lt", "le", "gt", "ge"):
            for op in opmap[opname]:
                self.assertRaises(TypeError, op, imag1a, imag2) 
Example 7
Project: comet-commonsense   Author: atcbosselut   File: conceptnet.py    Apache License 2.0 6 votes vote down vote up
def shuffle_sequences(self, split="train", keys=None):
        if keys is None:
            # print(type(self.data))
            # print(type(self.data.keys()))
            keys = self.data[split].keys()

        for key in keys:
            if key in ["positive", "negative"]:
                continue
            idxs = list(range(len(self.data[split][key])))

            random.shuffle(idxs)

            self.sequences[split][key] = \
                self.sequences[split][key].index_select(
                    0, torch.LongTensor(idxs))

            temp = [self.data[split][key][i] for i in idxs]
            self.data[split][key] = temp

            temp = [self.masks[split][key][i] for i in idxs]
            self.masks[split][key] = temp 
Example 8
Project: comet-commonsense   Author: atcbosselut   File: atomic.py    Apache License 2.0 6 votes vote down vote up
def shuffle_sequences(self, split="train", keys=None):
        if keys is None:
            # print(type(self.data))
            # print(type(self.data.keys()))
            keys = self.data[split].keys()

        for key in keys:
            idxs = list(range(len(self.data[split][key])))

            random.shuffle(idxs)

            self.sequences[split][key] = \
                self.sequences[split][key].index_select(
                    0, torch.LongTensor(idxs))

            temp = [self.data[split][key][i] for i in idxs]
            self.data[split][key] = temp
            temp = [self.masks[split][key][i] for i in idxs]
            self.masks[split][key] = temp 
Example 9
Project: Kaggle-Statoil-Challenge   Author: adodd202   File: utils.py    MIT License 6 votes vote down vote up
def train_valid_split(dataset, test_size=0.25, shuffle=False, random_seed=0):
    """ Return a list of splitted indices from a DataSet.
    Indices can be used with DataLoader to build a train and validation set.

    Arguments:
        A Dataset
        A test_size, as a float between 0 and 1 (percentage split) or as an int (fixed number split)
        Shuffling True or False
        Random seed
    """
    length = dataset.__len__()
    indices = list(range(1, length))

    if shuffle == True:
        random.seed(random_seed)
        random.shuffle(indices)

    if type(test_size) is float:
        split = floor(test_size * length)
    elif type(test_size) is int:
        split = test_size
    else:
        raise ValueError('%s should be an int or a float' % str)
    return indices[split:], indices[:split] 
Example 10
Project: mlimages   Author: icoxfog417   File: chainer_alex.py    MIT License 6 votes vote down vote up
def show(limit, shuffle=True):
    td = TrainingData(LABEL_FILE, img_root=IMAGES_ROOT, mean_image_file=MEAN_IMAGE_FILE, image_property=IMAGE_PROP)
    _limit = limit if limit > 0 else 5
    iterator = td.generate()
    if shuffle:
        import random
        shuffled = list(iterator)
        random.shuffle(shuffled)
        iterator = iter(shuffled)

    i = 0
    for arr, im in iterator:
        restored = td.data_to_image(arr, im.label, raw=True)
        print(im.path)
        restored.image.show()
        i += 1
        if i >= _limit:
            break 
Example 11
Project: pytorch_NER_BiLSTM_CNN_CRF   Author: bamtercelboo   File: DataLoader_NER.py    Apache License 2.0 6 votes vote down vote up
def __init__(self, path, shuffle, config):
        """
        :param path: data path list
        :param shuffle:  shuffle bool
        :param config:  config
        """
        #
        print("Loading Data......")
        self.data_list = []
        self.max_count = config.max_count
        self.path = path
        self.shuffle = shuffle
        # char feature
        self.pad_char = [char_pad, char_pad]
        # self.pad_char = []
        self.max_char_len = config.max_char_len 
Example 12
Project: pytorch_NER_BiLSTM_CNN_CRF   Author: bamtercelboo   File: DataLoader_NER.py    Apache License 2.0 6 votes vote down vote up
def dataLoader(self):
        """
        :return:
        """
        path = self.path
        shuffle = self.shuffle
        assert isinstance(path, list), "Path Must Be In List"
        print("Data Path {}".format(path))
        for id_data in range(len(path)):
            print("Loading Data Form {}".format(path[id_data]))
            insts = self._Load_Each_Data(path=path[id_data], shuffle=shuffle)
            random.shuffle(insts)
            self._write_shuffle_inst_to_file(insts, path=path[id_data])
            self.data_list.append(insts)
        # return train/dev/test data
        if len(self.data_list) == 3:
            return self.data_list[0], self.data_list[1], self.data_list[2]
        elif len(self.data_list) == 2:
            return self.data_list[0], self.data_list[1] 
Example 13
Project: VSE-C   Author: ExplorerFreda   File: data.py    MIT License 6 votes vote down vote up
def get_loader_single(data_name, split, root, json, vocab, transform,
                      batch_size=100, shuffle=True,
                      num_workers=2, ids=None, collate_fn=collate_fn):
    """Returns torch.utils.data.DataLoader for custom coco dataset."""
    if 'coco' in data_name:
        # COCO custom dataset
        dataset = CocoDataset(root=root,
                              json=json,
                              vocab=vocab,
                              transform=transform, ids=ids)
    elif 'f8k' in data_name or 'f30k' in data_name:
        dataset = FlickrDataset(root=root,
                                split=split,
                                json=json,
                                vocab=vocab,
                                transform=transform)

    # Data loader
    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                              shuffle=shuffle,
                                              pin_memory=True,
                                              num_workers=num_workers,
                                              collate_fn=collate_fn)
    return data_loader 
Example 14
Project: deep-learning-note   Author: wdxtub   File: utils.py    MIT License 6 votes vote down vote up
def load_data_fashion_mnist(batch_size, resize=None, root='./data'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())

    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter 
Example 15
Project: deep-learning-note   Author: wdxtub   File: utils.py    MIT License 6 votes vote down vote up
def data_iter_random(corpus_indices, batch_size, num_steps, device=None):
    # 减1是因为输出的索引x是相应输入的索引y加1
    num_examples = (len(corpus_indices) - 1) // num_steps
    epoch_size = num_examples // batch_size
    example_indices = list(range(num_examples))
    random.shuffle(example_indices)

    # 返回从pos开始的长为num_steps的序列
    def _data(pos):
        return corpus_indices[pos: pos + num_steps]

    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    for i in range(epoch_size):
        # 每次读取batch_size个随机样本
        i = i * batch_size
        batch_indices = example_indices[i: i + batch_size]
        X = [_data(j * num_steps) for j in batch_indices]
        Y = [_data(j * num_steps + 1) for j in batch_indices]
        yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device) 
Example 16
Project: deep-learning-note   Author: wdxtub   File: 30_series_sampling.py    MIT License 6 votes vote down vote up
def data_iter_random(corpus_indices, batch_size, num_steps, device=None):
    # 减1是因为输出的索引x是相应输入的索引y加1
    num_examples = (len(corpus_indices) - 1) // num_steps
    epoch_size = num_examples // batch_size
    example_indices = list(range(num_examples))
    random.shuffle(example_indices)

    # 返回从pos开始的长为num_steps的序列
    def _data(pos):
        return corpus_indices[pos: pos + num_steps]
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    for i in range(epoch_size):
        # 每次读取batch_size个随机样本
        i = i * batch_size
        batch_indices = example_indices[i: i + batch_size]
        X = [_data(j * num_steps) for j in batch_indices]
        Y = [_data(j * num_steps + 1) for j in batch_indices]
        yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device) 
Example 17
Project: spqrel_tools   Author: LCAS   File: Kernel.py    MIT License 6 votes vote down vote up
def _processRandom(self, elem, sessionID):
        """Process a <random> AIML element.

        <random> elements contain zero or more <li> elements.  If
        none, the empty string is returned.  If one or more <li>
        elements are present, one of them is selected randomly to be
        processed recursively and have its results returned.  Only the
        chosen <li> element's contents are processed.  Any non-<li> contents are
        ignored.

        """
        listitems = []
        for e in elem[2:]:
            if e[0] == 'li':
                listitems.append(e)
        if len(listitems) == 0:
            return ""

        # select and process a random listitem.
        random.shuffle(listitems)
        return self._processElement(listitems[0], sessionID)

    # <sentence> 
Example 18
Project: fs_image   Author: facebookincubator   File: common.py    MIT License 5 votes vote down vote up
def shuffled(it: Iterable[T]) -> List[T]:
    l = list(it)
    random.shuffle(l)
    return l 
Example 19
Project: SyNEThesia   Author: RunOrVeith   File: data_loaders.py    MIT License 5 votes vote down vote up
def reset(self):
        if self.allow_shuffle:
                    random.shuffle(self._iterator_source)
        self.iterator = iter(self._iterator_source) 
Example 20
Project: Caffe-Python-Data-Layer   Author: liuxianming   File: BasePythonDataLayer.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def shuffle(self):
        """Shuffle all samples and their labels"""
        shuffled_data_ = list(zip(self._data, self._label))
        random.shuffle(shuffled_data_)
        self._data, self._label = zip(*shuffled_data_)
        self._data = list(self._data)
        self._label = list(self._label) 
Example 21
Project: meta-transfer-learning   Author: erfaneshrati   File: miniimagenet.py    MIT License 5 votes vote down vote up
def sample(self, num_images):
        """
        Sample images (as numpy arrays) from the class.

        Returns:
          A sequence of 84x84x3 numpy arrays.
          Each pixel ranges from 0 to 1.
        """
        names = [f for f in os.listdir(self.dir_path) if f.endswith('.JPEG')]
        random.shuffle(names)
        images = []
        for name in names[:num_images]:
            images.append(self._read_image(name))
        return images 
Example 22
Project: meta-transfer-learning   Author: erfaneshrati   File: reptile.py    MIT License 5 votes vote down vote up
def _mini_batches(samples, batch_size, num_batches, replacement):
    """
    Generate mini-batches from some data.

    Returns:
      An iterable of sequences of (input, label) pairs,
        where each sequence is a mini-batch.
    """
    samples = list(samples)
    if replacement:
        for _ in range(num_batches):
            yield random.sample(samples, batch_size)
        return
    cur_batch = []
    batch_count = 0
    while True:
        random.shuffle(samples)
        for sample in samples:
            cur_batch.append(sample)
            if len(cur_batch) < batch_size:
                continue
            yield cur_batch
            cur_batch = []
            batch_count += 1
            if batch_count == num_batches:
                return 
Example 23
Project: explirefit   Author: codogogo   File: data_helper.py    Apache License 2.0 5 votes vote down vote up
def create_corrupts(correct_train, correct_test, concept_dict, prev_dict, num_corrupt = 2, shuffle = True):
	concepts = list(concept_dict.values())
	train_corrupt = []
	test_corrupt = []
	current_dict = {}	

	merged = []
	merged.extend(correct_train)
	merged.extend(correct_test)

	for i in range(len(merged)):
		rel_str = merged[i][1] + "_" + merged[i][0]
		if rel_str not in prev_dict and rel_str not in current_dict:
			(train_corrupt if i < len(correct_train) else test_corrupt).append((merged[i][1], merged[i][0], "0"))
			current_dict[rel_str] = 1
		
		for j in range(num_corrupt - 1):
			c1 = concepts[random.randint(0, len(concepts) - 1)]
			c2 = concepts[random.randint(0, len(concepts) - 1)]
			rel_str = c1 + "_" + c2
			while(rel_str in prev_dict or rel_str in current_dict):
				c1 = concepts[random.randint(0, len(concepts) - 1)]
				c2 = concepts[random.randint(0, len(concepts) - 1)]
				rel_str = c1 + "_" + c2
			(train_corrupt if i < len(correct_train) else test_corrupt).append((c1, c2, "0"))
			current_dict[rel_str] = 1
			
	fdata_train = []
	fdata_train.extend(correct_train)
	fdata_train.extend(train_corrupt)
	
	fdata_test = []
	fdata_test.extend(correct_test)
	fdata_test.extend(test_corrupt)

	if shuffle:
		random.shuffle(fdata_train)
		random.shuffle(fdata_test)
	
	return (fdata_train, fdata_test) 
Example 24
Project: explirefit   Author: codogogo   File: trainer.py    Apache License 2.0 5 votes vote down vote up
def test(self, test_data, batch_size, eval_params = None, print_batches = False):
		epoch_loss = 0
		batches_eval = batcher.batch_iter(test_data, batch_size, 1, shuffle = False)
		eval_batch_counter = 1
				
		for batch_eval in batches_eval:
			if (len(batch_eval) == batch_size):
				feed_dict_eval, golds_batch_eval = self.feed_dict_function(self.model, batch_eval, None, predict = True)	
				preds_batch_eval = self.predict(feed_dict_eval)
				batch_eval_loss = self.model.loss.eval(session = self.session, feed_dict = feed_dict_eval)
				epoch_loss += batch_eval_loss

				if eval_batch_counter == 1:
					golds = golds_batch_eval
					preds = preds_batch_eval
				else:
					golds = np.concatenate((golds, golds_batch_eval), axis = 0)
					preds = np.concatenate((preds, preds_batch_eval), axis = 0)
				if print_batches:
					print(eval_batch_counter)
			eval_batch_counter += 1

		if self.eval_func is not None:
			score = self.eval_func(golds, preds, eval_params)
			return preds, epoch_loss, score
		else:
			return preds, epoch_loss 
Example 25
Project: explirefit   Author: codogogo   File: trainer.py    Apache License 2.0 5 votes vote down vote up
def train(self, train_data, batch_size, max_num_epochs, num_epochs_not_better_end = 5, epoch_diff_smaller_end = 1e-5, print_batch_losses = True, configuration = None, eval_params = None, shuffle_data = True):
		batch_counter = 0
		epoch_counter = 0
		epoch_losses = []
		epoch_loss = 0
		batches_in_epoch = int(len(train_data)/batch_size) + 1

		batches = batcher.batch_iter(train_data, batch_size, max_num_epochs, shuffle = shuffle_data)
		for batch in batches:
			batch_counter += 1

			if (len(batch) == batch_size):
				
				feed_dict, gold_labels = self.feed_dict_function(self.model, batch, config = configuration)
				if print_batch_losses:
					print("Batch " + str(batch_counter) + ": running single iteration training..." )
				self.train_model_single_iteration(feed_dict)
			
				batch_loss = self.model.loss.eval(session = self.session, feed_dict = feed_dict)
				if print_batch_losses:
					print("Batch " + str(batch_counter) + ": " + str(batch_loss))

			if batch_counter % batches_in_epoch == 0:
				epoch_counter += 1
				print("Evaluating the epoch loss for epoch " + str(epoch_counter))
				
				preds, epoch_loss, score = self.test(train_data, batch_size, eval_params, False)

				print("Epoch " + str(epoch_counter) + ": " + str(epoch_loss))
				print("Epoch (train) performance: " + str(score))
				print("Previous epochs: " + str(epoch_losses))

				if len(epoch_losses) == num_epochs_not_better_end and (epoch_losses[0] - epoch_loss < epoch_diff_smaller_end):
					break
				else: 
					epoch_losses.append(epoch_loss)
					epoch_loss = 0
					if len(epoch_losses) > num_epochs_not_better_end:
						epoch_losses.pop(0) 
Example 26
Project: pyblish-win   Author: pyblish   File: test_io.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_threads(self):
        try:
            # Write out many bytes with exactly the same number of 0's,
            # 1's... 255's. This will help us check that concurrent reading
            # doesn't duplicate or forget contents.
            N = 1000
            l = list(range(256)) * N
            random.shuffle(l)
            s = bytes(bytearray(l))
            with self.open(support.TESTFN, "wb") as f:
                f.write(s)
            with self.open(support.TESTFN, self.read_mode, buffering=0) as raw:
                bufio = self.tp(raw, 8)
                errors = []
                results = []
                def f():
                    try:
                        # Intra-buffer read then buffer-flushing read
                        for n in cycle([1, 19]):
                            s = bufio.read(n)
                            if not s:
                                break
                            # list.append() is atomic
                            results.append(s)
                    except Exception as e:
                        errors.append(e)
                        raise
                threads = [threading.Thread(target=f) for x in range(20)]
                for t in threads:
                    t.start()
                time.sleep(0.02) # yield
                for t in threads:
                    t.join()
                self.assertFalse(errors,
                    "the following exceptions were caught: %r" % errors)
                s = b''.join(results)
                for i in range(256):
                    c = bytes(bytearray([i]))
                    self.assertEqual(s.count(c), N)
        finally:
            support.unlink(support.TESTFN) 
Example 27
Project: pyblish-win   Author: pyblish   File: test_math.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def testFactorial(self):
        def fact(n):
            result = 1
            for i in range(1, int(n)+1):
                result *= i
            return result
        values = range(10) + [50, 100, 500]
        random.shuffle(values)
        for x in values:
            for cast in (int, long, float):
                self.assertEqual(math.factorial(cast(x)), fact(x), (x, fact(x), math.factorial(x)))
        self.assertRaises(ValueError, math.factorial, -1)
        self.assertRaises(ValueError, math.factorial, math.pi) 
Example 28
Project: pyblish-win   Author: pyblish   File: test_sort.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_cmpNone(self):
        # Testing None as a comparison function.

        L = range(50)
        random.shuffle(L)
        L.sort(None)
        self.assertEqual(L, range(50)) 
Example 29
Project: pyblish-win   Author: pyblish   File: test_sort.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_decorated(self):
        data = 'The quick Brown fox Jumped over The lazy Dog'.split()
        copy = data[:]
        random.shuffle(data)
        data.sort(key=str.lower)
        copy.sort(cmp=lambda x,y: cmp(x.lower(), y.lower())) 
Example 30
Project: pyblish-win   Author: pyblish   File: test_sort.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_reverse(self):
        data = range(100)
        random.shuffle(data)
        data.sort(reverse=True)
        self.assertEqual(data, range(99,-1,-1))
        self.assertRaises(TypeError, data.sort, "wrong type") 
Example 31
Project: pyblish-win   Author: pyblish   File: test_dict.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_literal_constructor(self):
        # check literal constructor for different sized dicts
        # (to exercise the BUILD_MAP oparg).
        for n in (0, 1, 6, 256, 400):
            items = [(''.join(random.sample(string.letters, 8)), i)
                     for i in range(n)]
            random.shuffle(items)
            formatted_items = ('{!r}: {:d}'.format(k, v) for k, v in items)
            dictliteral = '{' + ', '.join(formatted_items) + '}'
            self.assertEqual(eval(dictliteral), dict(items)) 
Example 32
Project: visual-asset-generator   Author: Automattic   File: spotlight.py    GNU General Public License v3.0 5 votes vote down vote up
def render(self, magic, frame_path, copy, cta):
        if len(copy) > 0:
            self.copy = copy[:self.content['character_limit']]
        else:
            self.copy = LOREM[:self.content['character_limit']]
        self.cta = cta
        if len(cta) == 0:
            self.cta = 'Get started'
        self.color_scheme = 'blue'
        if (frame_path.find('_b') != -1):
            self.color_scheme = 'pink'
        self.db.newDrawing()
        self.db.size(self.width, self.height)
        self.renderPortrait(magic)
        self.renderFrame(frame_path)
        self.renderLogo()

        #randomize the order of the content of the ad
        # renderFunctions = [self.renderBadge, self.renderCopy, self.renderButton]
        # shuffle(renderFunctions)
        # cursor = self.height
        # for i in range(0,len(renderFunctions)):
        #     cursor = renderFunctions[i](cursor)

        cursor = self.renderBadge(self.height)
        cursor = self.renderCopy(cursor)
        cursor = self.renderButton(cursor) 
Example 33
Project: python-samples   Author: dek-odoo   File: dek_program084.py    Apache License 2.0 5 votes vote down vote up
def main(numlist):
    shuffle(numlist)
    print numlist 
Example 34
Project: python-samples   Author: dek-odoo   File: dek_program085.py    Apache License 2.0 5 votes vote down vote up
def main(lst):
    # print lst, type(lst)
    # print ''.join(lst)
    shuffle(lst)
    # print 'sdfgsdg'
    return lst 
Example 35
Project: claxon   Author: vanatteveldt   File: learning_speed.py    GNU General Public License v3.0 5 votes vote down vote up
def shuffle_annotations(annotations):
    # Would like to use sessions, but at least crime just doesn't have any, so split and shuffle Niek data and add the rest
    x, y = annotations[:682], annotations[682:]
    random.shuffle(x)
    return x + y 
Example 36
Project: comet-commonsense   Author: atcbosselut   File: conceptnet.py    Apache License 2.0 5 votes vote down vote up
def reset_offsets(self, splits=["train", "test", "dev"],
                      shuffle=True, keys=None):
        if isinstance(splits, str):
            splits = [splits]

        for split in splits:
            if keys is None:
                keys = ["total", "positive", "negative"]

            for key in keys:
                self.offsets[split][key] = 0

            if shuffle:
                self.shuffle_sequences(split, keys) 
Example 37
Project: comet-commonsense   Author: atcbosselut   File: atomic.py    Apache License 2.0 5 votes vote down vote up
def reset_offsets(self, splits=["train", "test", "dev"],
                      shuffle=True, keys=None):
        if isinstance(splits, str):
            splits = [splits]

        for split in splits:
            if keys is None:
                keys = ["total"]

            for key in keys:
                self.offsets[split][key] = 0

            if shuffle:
                self.shuffle_sequences(split, keys) 
Example 38
Project: ieml   Author: IEMLdev   File: test_tree_graph.py    GNU General Public License v3.0 5 votes vote down vote up
def _tree_from_range(self, max):
        r = list(range(1, max))
        random.shuffle(r)
        transitions = {(0, i, 'data') for i in r}
        return TreeGraph(transitions) 
Example 39
Project: DBC-FederatedLearning-Client-VNX   Author: DeepBrainChain   File: file_manipulator.py    Apache License 2.0 5 votes vote down vote up
def general_split(input_path, out_folders, out_fractions, shuff = True, remove_original=False):
    #print(os.path.join(input_path, outputpath))
    #print(os.walk(input_path))

    for dirpath, dirnames, filenames in os.walk(input_path):
        print("dirpath:")
        print(dirpath)
        print("dirnames:")
        print(dirnames)
        
        n=len(filenames)
        print("number of files: "+str(n))
        lout= len(out_folders)
        k=0
        if shuff == True:
            random.shuffle(filenames)
        else:
            pass
        out_number = []

        for i, (outputpath, outfraction) in enumerate(zip(out_folders, out_fractions)):
            structure = os.path.join(outputpath, os.path.relpath(dirpath,input_path))
            print("structure:")
            print(structure)
            if not os.path.isdir(structure):
                os.mkdir(structure)
            else:
                print("Folder does already exits!")
            if i < lout-1:
                x = int(n*outfraction)
            elif out_number != []:
                x = n-sum(out_number)
            elif out_number == 0:
                x = 0
            out_number.append(x)
            
            for filename in filenames[k:(k+x)]:
                copyfile(os.path.join(dirpath,filename), os.path.join(structure, filename))

    if remove_original==True:
        shutil.rmtree(input_path) 
Example 40
Project: kuaa   Author: rafaelwerneck   File: common.py    GNU General Public License v3.0 5 votes vote down vote up
def shuffleData(labels, feats):
    assert len(labels) == len(feats)

    indexes = range(len(labels))
    random.shuffle(indexes)

    def simpleShuffle(lst):
        return map(lambda i: lst[i], indexes)

    labels = simpleShuffle(labels)
    feats = simpleShuffle(feats)

    return labels, feats 
Example 41
Project: Electrolyte_Analysis_FTIR   Author: Samuel-Buteau   File: Constant_run.py    MIT License 5 votes vote down vote up
def __init__(self, n_samples=None, list_of_indecies=None):
        if not n_samples is None:
            self.GetFresh_list = numpy.arange(n_samples, dtype=numpy.int32)
            self.get_fresh_count = n_samples
        elif not list_of_indecies is None:
            self.GetFresh_list = numpy.array(copy.deepcopy(list_of_indecies))
            self.get_fresh_count = len(self.GetFresh_list)
        else:
            raise Exception('Invalid Input')

        numpy.random.shuffle(self.GetFresh_list)
        self.get_fresh_pos = 0 
Example 42
Project: Electrolyte_Analysis_FTIR   Author: Samuel-Buteau   File: Constant_run.py    MIT License 5 votes vote down vote up
def get(self, n):
        """
        will return a list of n random numbers in self.GetFresh_list
        - Samuel Buteau, October 2018
        """
        if n >= self.get_fresh_count:
            return numpy.concatenate((self.get(int(n/2)),self.get(n- int(n/2))))


        reshuffle_flag = False

        n_immediate_fulfill = min(n, self.get_fresh_count - self.get_fresh_pos)
        batch_of_indecies = numpy.empty([n], dtype=numpy.int32)
        for i in range(0, n_immediate_fulfill):
            batch_of_indecies[i] = self.GetFresh_list[i + self.get_fresh_pos]

        self.get_fresh_pos += n_immediate_fulfill
        if self.get_fresh_pos >= self.get_fresh_count:
            self.get_fresh_pos -= self.get_fresh_count
            reshuffle_flag = True

            # Now, the orders that needed to be satisfied are satisfied.
        n_delayed_fulfill = max(0, n - n_immediate_fulfill)
        if reshuffle_flag:
            numpy.random.shuffle(self.GetFresh_list)

        if n_delayed_fulfill > 0:
            for i in range(0, n_delayed_fulfill):
                batch_of_indecies[i + n_immediate_fulfill] = self.GetFresh_list[i]
            self.get_fresh_pos = n_delayed_fulfill

        return batch_of_indecies 
Example 43
Project: Kaggle-Statoil-Challenge   Author: adodd202   File: utils.py    MIT License 5 votes vote down vote up
def get_mean_and_std(dataset):
    '''Compute the mean and std value of dataset.'''
    dataloader = trainloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)

    mean = torch.zeros(3)
    std = torch.zeros(3)
    print('==> Computing mean and std..')
    for inputs, targets in dataloader:
        for i in range(3):
            mean[i] += inputs[:, i, :, :].mean()
            std[i] += inputs[:, i, :, :].std()
    mean.div_(len(dataset))
    std.div_(len(dataset))
    return mean, std 
Example 44
Project: Kaggle-Statoil-Challenge   Author: adodd202   File: utils.py    MIT License 5 votes vote down vote up
def getStatoilTrainValLoaders(args):
    fixSeed(args)
    local_data = pd.read_json('/home/adodd202/train.json')

    local_data = shuffle(local_data)  # otherwise same validation set each time!
    local_data = local_data.reindex(np.random.permutation(local_data.index))

    local_data['band_1'] = local_data['band_1'].apply(lambda x: np.array(x).reshape(75, 75))
    local_data['band_2'] = local_data['band_2'].apply(lambda x: np.array(x).reshape(75, 75))
    local_data['inc_angle'] = pd.to_numeric(local_data['inc_angle'], errors='coerce')
    local_data['inc_angle'].fillna(0, inplace=True)

    band_1 = np.concatenate([im for im in local_data['band_1']]).reshape(-1, 75, 75)
    band_2 = np.concatenate([im for im in local_data['band_2']]).reshape(-1, 75, 75)
    # band_3=(band_1+band_2)/2
    local_full_img = np.stack([band_1, band_2], axis=1)

    train_imgs = XnumpyToTensor(local_full_img, args)
    train_targets = YnumpyToTensor(local_data['is_iceberg'].values, args)
    dset_train = TensorDataset(train_imgs, train_targets)

    local_train_ds, local_val_ds = trainTestSplit(dset_train, args.validationRatio)
    local_train_loader = torch.utils.data.DataLoader(local_train_ds, batch_size=args.batch_size, shuffle=False,
                                                     num_workers=args.workers)
    local_val_loader = torch.utils.data.DataLoader(local_val_ds, batch_size=args.batch_size, shuffle=False,
                                                   num_workers=args.workers)
    return local_train_loader, local_val_loader, local_train_ds, local_val_ds 
Example 45
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_MODS.py    MIT License 5 votes vote down vote up
def DSetGlobal(self, directory = '/home/musk/MODS_data/data/shuffled'):
          '''
          Function to build a rough dataset of images with labels.
          Returns a pkl file with data and data_labels.
          '''
          ## Find files in folders
          foldername = next(os.walk(directory))[1]
          for dirname in foldername: 
          ##dirname: positive and negative
	     print datetime.datetime.now()
             f2 = os.path.join(directory,dirname)
             onlyfiles = [ f3 for f3 in os.listdir(f2) if os.path.isfile(os.path.join(f2,f3))]		
             suffix = dirname
             if suffix == 'positive':
			label = 1
             else:
			label = 0
             for filename in onlyfiles:
                 try: ##reads the image, converts to greyscale, resizes it, appends it to data and adds label too
                     current_image = scipy.misc.imread(os.path.join(f2,filename), mode='L')
                     #current_image = scipy.misc.imread(os.path.join(f2,filename), mode='RGB')
                     #current_image = scipy.misc.imresize(current_image,(256, 192),interp='cubic')
                     current_image = scipy.misc.imresize(current_image,(224,224),interp='bicubic')
                     self.data.append(numpy.hstack(current_image))
                     self.data_label.append(label)
                 except IOError: ##If the image can't be read, or is corrupted
                     print(filename)
                 #scipy.misc.imshow(current_image) ##shows the image being read 
           
          ## shuffles de images with their label
          combined = zip(self.data, self.data_label)
          random.shuffle(combined)
          self.data[:], self.data_label[:] = zip(*combined)
          
          print len(self.data)

          dataset = [self.data, self.data_label]
          f = file('MODS_data.pkl','wb') ##save images in a pkl
          cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
          f.close()
          print(datetime.datetime.now() - self.start_time) 
Example 46
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_MODS.py    MIT License 5 votes vote down vote up
def Djoin(self, name='seg_MODS_data.pkl'):
         '''
         Takes as input segmented data from the Dset function. The data is split into
         training and testing. Each list (dataset) in the segmented data is taken,
         once, as the testing set. Then, the rest of the data is shuffled, and put
         into the testing set. Therefore, for each dataset, we have a different testing
         set of images, with also a different set of training images, shuffled twice.     
         Returns n datasets (same amount as in Dset). The datasets are made of two lists:
         training and testing. These lists are made of two lists each: data and labels.
         '''
         f = file(name, 'rb')
         datamama = cPickle.load(f)
         f.close()
         for i in xrange(len(datamama)):
             data_join = []
             data_label_join = []
             validation = datamama[i]
             data_temp = datamama[:i] + datamama[i+1:]
             for j in data_temp:
                 data_join+=j[0]
                 data_label_join+=j[1]
             
             ##Shuffle data
             combined = zip(data_join, data_label_join)
             random.shuffle(combined)
             data_join[:], data_label_join[:] = zip(*combined)                 
            
             training = [data_join,data_label_join]
             dataset_new = [training,validation]
             f = file('cut_MODS_all_data_bw_224_224_{0}.pkl'.format(i),'wb')
             cPickle.dump(dataset_new, f, protocol=cPickle.HIGHEST_PROTOCOL)
             f.close() 
Example 47
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_labs_MODS.py    MIT License 5 votes vote down vote up
def Djoin(self, v, name='seg_MODS_data.pkl'):
         '''
         Takes as input segmented data from the Dset function. The data is split into
         training and testing. Each list (dataset) in the segmented data is taken,
         once, as the testing set. Then, the rest of the data is shuffled, and put
         into the testing set. Therefore, for each dataset, we have a different testing
         set of images, with also a different set of training images, shuffled twice.     
         Returns n datasets (same amount as in Dset). The datasets are made of two lists:
         training and testing. These lists are made of two lists each: data and labels.
         '''
         f = file(name, 'rb')
         datamama = cPickle.load(f)
         f.close()
         for i in xrange(len(datamama)):
             data_join = []
             data_label_join = []
	     #if 'test' in v:
             validation = datamama[i]
             data_temp = datamama[:i] + datamama[i+1:]
	     #else:
		#validation = []
		#data_temp = datamama[:]
             for j in data_temp:
                 data_join+=j[0]
                 data_label_join+=j[1]
             
             ##Shuffle data
             combined = zip(data_join, data_label_join)
             random.shuffle(combined)
             data_join[:], data_label_join[:] = zip(*combined)                 
            
             training = [data_join,data_label_join]
             dataset_new = [training,validation]
             f = file('MODS_224_224_{0}_{1}.pkl'.format(i, v),'wb')
	     print len(validation), v
             cPickle.dump(dataset_new, f, protocol=cPickle.HIGHEST_PROTOCOL)
             f.close() 
Example 48
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_MODS.py    MIT License 5 votes vote down vote up
def DSetGlobal(self, directory = '/home/musk/MODS_data/data/shuffled'):
          '''
          Function to build a rough dataset of images with labels.
          Returns a pkl file with data and data_labels.
          '''
          ## Find files in folders
          foldername = next(os.walk(directory))[1]
          for dirname in foldername: 
          ##dirname: positive and negative
	     print datetime.datetime.now()
             f2 = os.path.join(directory,dirname)
             onlyfiles = [ f3 for f3 in os.listdir(f2) if os.path.isfile(os.path.join(f2,f3))]		
             suffix = dirname
             if suffix == 'positive':
			label = 1
             else:
			label = 0
             for filename in onlyfiles:
                 try: ##reads the image, converts to greyscale, resizes it, appends it to data and adds label too
                     current_image = scipy.misc.imread(os.path.join(f2,filename), mode='L')
                     #current_image = scipy.misc.imread(os.path.join(f2,filename), mode='RGB')
                     #current_image = scipy.misc.imresize(current_image,(256, 192),interp='cubic')
                     current_image = scipy.misc.imresize(current_image,(224,224),interp='bicubic')
                     self.data.append(numpy.hstack(current_image))
                     self.data_label.append(label)
                 except IOError: ##If the image can't be read, or is corrupted
                     print(filename)
                 #scipy.misc.imshow(current_image) ##shows the image being read 
           
          ## shuffles de images with their label
          combined = zip(self.data, self.data_label)
          random.shuffle(combined)
          self.data[:], self.data_label[:] = zip(*combined)
          
          print len(self.data)

          dataset = [self.data, self.data_label]
          f = file('MODS_data.pkl','wb') ##save images in a pkl
          cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
          f.close()
          print(datetime.datetime.now() - self.start_time) 
Example 49
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_labs_MODS.py    MIT License 5 votes vote down vote up
def Djoin(self, v, name='seg_MODS_data.pkl'):
         '''
         Takes as input segmented data from the Dset function. The data is split into
         training and testing. Each list (dataset) in the segmented data is taken,
         once, as the testing set. Then, the rest of the data is shuffled, and put
         into the testing set. Therefore, for each dataset, we have a different testing
         set of images, with also a different set of training images, shuffled twice.     
         Returns n datasets (same amount as in Dset). The datasets are made of two lists:
         training and testing. These lists are made of two lists each: data and labels.
         '''
         f = file(name, 'rb')
         datamama = cPickle.load(f)
         f.close()
         for i in xrange(len(datamama)):
             data_join = []
             data_label_join = []
	     #if 'test' in v:
             validation = datamama[i]
             data_temp = datamama[:i] + datamama[i+1:]
	     #else:
		#validation = []
		#data_temp = datamama[:]
             for j in data_temp:
                 data_join+=j[0]
                 data_label_join+=j[1]
             
             ##Shuffle data
             combined = zip(data_join, data_label_join)
             random.shuffle(combined)
             data_join[:], data_label_join[:] = zip(*combined)                 
            
             training = [data_join,data_label_join]
             dataset_new = [training,validation]
             f = file('MODS_224_224_{0}_{1}.pkl'.format(i, v),'wb')
	     print len(validation), v
             cPickle.dump(dataset_new, f, protocol=cPickle.HIGHEST_PROTOCOL)
             f.close() 
Example 50
Project: mlimages   Author: icoxfog417   File: chainer_alex.py    MIT License 5 votes vote down vote up
def train(epoch=10, batch_size=32, gpu=False):
    if gpu:
        cuda.check_cuda_available()
    xp = cuda.cupy if gpu else np

    td = TrainingData(LABEL_FILE, img_root=IMAGES_ROOT, image_property=IMAGE_PROP)

    # make mean image
    if not os.path.isfile(MEAN_IMAGE_FILE):
        print("make mean image...")
        td.make_mean_image(MEAN_IMAGE_FILE)
    else:
        td.mean_image_file = MEAN_IMAGE_FILE

    # train model
    label_def = LabelingMachine.read_label_def(LABEL_DEF_FILE)
    model = alex.Alex(len(label_def))
    optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)
    epoch = epoch
    batch_size = batch_size

    print("Now our model is {0} classification task.".format(len(label_def)))
    print("begin training the model. epoch:{0} batch size:{1}.".format(epoch, batch_size))

    if gpu:
        model.to_gpu()

    for i in range(epoch):
        print("epoch {0}/{1}: (learning rate={2})".format(i + 1, epoch, optimizer.lr))
        td.shuffle(overwrite=True)

        for x_batch, y_batch in td.generate_batches(batch_size):
            x = chainer.Variable(xp.asarray(x_batch))
            t = chainer.Variable(xp.asarray(y_batch))

            optimizer.update(model, x, t)
            print("loss: {0}, accuracy: {1}".format(float(model.loss.data), float(model.accuracy.data)))

        serializers.save_npz(MODEL_FILE, model)
        optimizer.lr *= 0.97