Python random.shuffle() Examples

The following are code examples for showing how to use random.shuffle(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: VSE-C   Author: ExplorerFreda   File: data.py    MIT License 10 votes vote down vote up
def get_loader_single(data_name, split, root, json, vocab, transform,
                      batch_size=100, shuffle=True,
                      num_workers=2, ids=None, collate_fn=collate_fn):
    """Returns torch.utils.data.DataLoader for custom coco dataset."""
    if 'coco' in data_name:
        # COCO custom dataset
        dataset = CocoDataset(root=root,
                              json=json,
                              vocab=vocab,
                              transform=transform, ids=ids)
    elif 'f8k' in data_name or 'f30k' in data_name:
        dataset = FlickrDataset(root=root,
                                split=split,
                                json=json,
                                vocab=vocab,
                                transform=transform)

    # Data loader
    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                              shuffle=shuffle,
                                              pin_memory=True,
                                              num_workers=num_workers,
                                              collate_fn=collate_fn)
    return data_loader 
Example 2
Project: Caffe-Python-Data-Layer   Author: liuxianming   File: BasePythonDataLayer.py    BSD 2-Clause "Simplified" License 7 votes vote down vote up
def setup(self, bottom, top):
        layer_params = yaml.load(self.param_str)
        self._layer_params = layer_params
        # default batch_size = 256
        self._batch_size = int(layer_params.get('batch_size', 256))
        self._resize = layer_params.get('resize', -1)
        self._mean_file = layer_params.get('mean_file', None)
        self._source_type = layer_params.get('source_type', 'CSV')
        self._shuffle = layer_params.get('shuffle', False)
        # read image_mean from file and preload all data into memory
        # will read either file or array into self._mean
        self.set_mean()
        self.preload_db()
        self._compressed = self._layer_params.get('compressed', True)
        if not self._compressed:
            self.decompress_data() 
Example 3
Project: Caffe-Python-Data-Layer   Author: liuxianming   File: BasePythonDataLayer.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def preload_db(self):
        """Read all images in and all labels

        Implemenation relies on DataManager Classes
        """
        print("Preloading Data...")
        if self._source_type == 'BCF':
            self._data_manager = BCFDataManager(self._layer_params)
        elif self._source_type == 'CSV':
            self._data_manager = CSVDataManager(self._layer_params)
        elif self._source_type == 'LMDB':
            self._data_manager = LMDBDataManager(self._layer_params)
        # read all data
        self._data, self._label = self._data_manager.load_all()
        self._sample_count = len(self._data)
        if self._shuffle:
            self.shuffle() 
Example 4
Project: meta-transfer-learning   Author: erfaneshrati   File: reptile.py    MIT License 6 votes vote down vote up
def _sample_mini_dataset(dataset, num_classes, num_shots, metatransfer=False):
    """
    Sample a few shot task from a dataset.

    Returns:
      An iterable of (input, label) pairs.
    """
    shuffled = list(dataset)
    if metatransfer:
        indices = np.random.randint(64, size=num_classes)
        for class_idx, class_real_idx in enumerate(indices):
            for sample in shuffled[class_real_idx].sample(num_shots):
                yield (sample, class_idx, class_real_idx)
    else:
        random.shuffle(shuffled)
        for class_idx, class_obj in enumerate(shuffled[:num_classes]):
            for sample in class_obj.sample(num_shots):
                yield (sample, class_idx) 
Example 5
Project: explirefit   Author: codogogo   File: trainer.py    Apache License 2.0 6 votes vote down vote up
def cross_validate(self, tf_session, class_labels, data_input, data_labels, num_folds, batch_size, num_epochs, model_reset_function = None, shuffle = False, fold_avg = 'micro', cl_perf = None, overall_perf = True, num_epochs_not_better_end = 2):
		conf_matrices = []
		best_epochs = []	
		if shuffle:
			paired = list(zip(data_input, data_labels))
			random.shuffle(paired)	
			data_input, data_labels = zip(*paired)

		folds = self.cross_validation_fold(data_input, data_labels, num_folds)
		fold_counter = 1
		for fold in folds:
			print("Fold: " + str(fold_counter))
			train_input = fold[0]; train_labels = fold[1]; dev_input = fold[2]; dev_labels = fold[3]
			model_reset_function(tf_session)
			conf_mat, epoch = self.train_and_test(tf_session, class_labels, train_input, train_labels, dev_input, dev_labels, batch_size, num_epochs, cl_perf, overall_perf, num_epochs_not_better_end = num_epochs_not_better_end)
			conf_matrices.append(conf_mat)
			best_epochs.append(epoch)
			fold_counter += 1
		if fold_avg == 'macro':
			return conf_matrices, best_epochs
		elif fold_avg == 'micro':
			return confusion_matrix.merge_confusion_matrices(conf_matrices), best_epochs
		else:
			raise ValueError("Unknown value for fold_avg") 
Example 6
Project: explirefit   Author: codogogo   File: batcher.py    Apache License 2.0 6 votes vote down vote up
def batch_iter(data, batch_size, num_epochs, shuffle = True):
		"""
		Generates a batch iterator for a dataset.
		"""
		#data = np.array(data, dtype = np.int32)
		data_size = len(data)

		num_batches_per_epoch = int(data_size/batch_size) + 1
		for epoch in range(num_epochs):
			# Shuffle the data at each epoch
			if shuffle:
				#shuffle_indices = np.random.permutation(np.arange(data_size))
				#shuffled_data = data[shuffle_indices]
				random.shuffle(data)
			#else:
			#	shuffled_data = data

			for batch_num in range(num_batches_per_epoch):
				start_index = batch_num * batch_size
				end_index = min((batch_num + 1) * batch_size, data_size)
				yield data[start_index:end_index] 
Example 7
Project: pyblish-win   Author: pyblish   File: test_richcmp.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_dicts(self):
        # Verify that __eq__ and __ne__ work for dicts even if the keys and
        # values don't support anything other than __eq__ and __ne__ (and
        # __hash__).  Complex numbers are a fine example of that.
        import random
        imag1a = {}
        for i in range(50):
            imag1a[random.randrange(100)*1j] = random.randrange(100)*1j
        items = imag1a.items()
        random.shuffle(items)
        imag1b = {}
        for k, v in items:
            imag1b[k] = v
        imag2 = imag1b.copy()
        imag2[k] = v + 1.0
        self.assertTrue(imag1a == imag1a)
        self.assertTrue(imag1a == imag1b)
        self.assertTrue(imag2 == imag2)
        self.assertTrue(imag1a != imag2)
        for opname in ("lt", "le", "gt", "ge"):
            for op in opmap[opname]:
                self.assertRaises(TypeError, op, imag1a, imag2) 
Example 8
Project: comet-commonsense   Author: atcbosselut   File: conceptnet.py    Apache License 2.0 6 votes vote down vote up
def shuffle_sequences(self, split="train", keys=None):
        if keys is None:
            # print(type(self.data))
            # print(type(self.data.keys()))
            keys = self.data[split].keys()

        for key in keys:
            if key in ["positive", "negative"]:
                continue
            idxs = list(range(len(self.data[split][key])))

            random.shuffle(idxs)

            self.sequences[split][key] = \
                self.sequences[split][key].index_select(
                    0, torch.LongTensor(idxs))

            temp = [self.data[split][key][i] for i in idxs]
            self.data[split][key] = temp

            temp = [self.masks[split][key][i] for i in idxs]
            self.masks[split][key] = temp 
Example 9
Project: comet-commonsense   Author: atcbosselut   File: atomic.py    Apache License 2.0 6 votes vote down vote up
def shuffle_sequences(self, split="train", keys=None):
        if keys is None:
            # print(type(self.data))
            # print(type(self.data.keys()))
            keys = self.data[split].keys()

        for key in keys:
            idxs = list(range(len(self.data[split][key])))

            random.shuffle(idxs)

            self.sequences[split][key] = \
                self.sequences[split][key].index_select(
                    0, torch.LongTensor(idxs))

            temp = [self.data[split][key][i] for i in idxs]
            self.data[split][key] = temp
            temp = [self.masks[split][key][i] for i in idxs]
            self.masks[split][key] = temp 
Example 10
Project: Kaggle-Statoil-Challenge   Author: adodd202   File: utils.py    MIT License 6 votes vote down vote up
def train_valid_split(dataset, test_size=0.25, shuffle=False, random_seed=0):
    """ Return a list of splitted indices from a DataSet.
    Indices can be used with DataLoader to build a train and validation set.

    Arguments:
        A Dataset
        A test_size, as a float between 0 and 1 (percentage split) or as an int (fixed number split)
        Shuffling True or False
        Random seed
    """
    length = dataset.__len__()
    indices = list(range(1, length))

    if shuffle == True:
        random.seed(random_seed)
        random.shuffle(indices)

    if type(test_size) is float:
        split = floor(test_size * length)
    elif type(test_size) is int:
        split = test_size
    else:
        raise ValueError('%s should be an int or a float' % str)
    return indices[split:], indices[:split] 
Example 11
Project: mlimages   Author: icoxfog417   File: chainer_alex.py    MIT License 6 votes vote down vote up
def show(limit, shuffle=True):
    td = TrainingData(LABEL_FILE, img_root=IMAGES_ROOT, mean_image_file=MEAN_IMAGE_FILE, image_property=IMAGE_PROP)
    _limit = limit if limit > 0 else 5
    iterator = td.generate()
    if shuffle:
        import random
        shuffled = list(iterator)
        random.shuffle(shuffled)
        iterator = iter(shuffled)

    i = 0
    for arr, im in iterator:
        restored = td.data_to_image(arr, im.label, raw=True)
        print(im.path)
        restored.image.show()
        i += 1
        if i >= _limit:
            break 
Example 12
Project: pytorch_NER_BiLSTM_CNN_CRF   Author: bamtercelboo   File: DataLoader_NER.py    Apache License 2.0 6 votes vote down vote up
def __init__(self, path, shuffle, config):
        """
        :param path: data path list
        :param shuffle:  shuffle bool
        :param config:  config
        """
        #
        print("Loading Data......")
        self.data_list = []
        self.max_count = config.max_count
        self.path = path
        self.shuffle = shuffle
        # char feature
        self.pad_char = [char_pad, char_pad]
        # self.pad_char = []
        self.max_char_len = config.max_char_len 
Example 13
Project: pytorch_NER_BiLSTM_CNN_CRF   Author: bamtercelboo   File: DataLoader_NER.py    Apache License 2.0 6 votes vote down vote up
def dataLoader(self):
        """
        :return:
        """
        path = self.path
        shuffle = self.shuffle
        assert isinstance(path, list), "Path Must Be In List"
        print("Data Path {}".format(path))
        for id_data in range(len(path)):
            print("Loading Data Form {}".format(path[id_data]))
            insts = self._Load_Each_Data(path=path[id_data], shuffle=shuffle)
            random.shuffle(insts)
            self._write_shuffle_inst_to_file(insts, path=path[id_data])
            self.data_list.append(insts)
        # return train/dev/test data
        if len(self.data_list) == 3:
            return self.data_list[0], self.data_list[1], self.data_list[2]
        elif len(self.data_list) == 2:
            return self.data_list[0], self.data_list[1] 
Example 14
Project: deep-learning-note   Author: wdxtub   File: utils.py    MIT License 6 votes vote down vote up
def load_data_fashion_mnist(batch_size, resize=None, root='./data'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())

    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter 
Example 15
Project: deep-learning-note   Author: wdxtub   File: utils.py    MIT License 6 votes vote down vote up
def data_iter_random(corpus_indices, batch_size, num_steps, device=None):
    # 减1是因为输出的索引x是相应输入的索引y加1
    num_examples = (len(corpus_indices) - 1) // num_steps
    epoch_size = num_examples // batch_size
    example_indices = list(range(num_examples))
    random.shuffle(example_indices)

    # 返回从pos开始的长为num_steps的序列
    def _data(pos):
        return corpus_indices[pos: pos + num_steps]

    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    for i in range(epoch_size):
        # 每次读取batch_size个随机样本
        i = i * batch_size
        batch_indices = example_indices[i: i + batch_size]
        X = [_data(j * num_steps) for j in batch_indices]
        Y = [_data(j * num_steps + 1) for j in batch_indices]
        yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device) 
Example 16
Project: deep-learning-note   Author: wdxtub   File: 30_series_sampling.py    MIT License 6 votes vote down vote up
def data_iter_random(corpus_indices, batch_size, num_steps, device=None):
    # 减1是因为输出的索引x是相应输入的索引y加1
    num_examples = (len(corpus_indices) - 1) // num_steps
    epoch_size = num_examples // batch_size
    example_indices = list(range(num_examples))
    random.shuffle(example_indices)

    # 返回从pos开始的长为num_steps的序列
    def _data(pos):
        return corpus_indices[pos: pos + num_steps]
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    for i in range(epoch_size):
        # 每次读取batch_size个随机样本
        i = i * batch_size
        batch_indices = example_indices[i: i + batch_size]
        X = [_data(j * num_steps) for j in batch_indices]
        Y = [_data(j * num_steps + 1) for j in batch_indices]
        yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device) 
Example 17
Project: spqrel_tools   Author: LCAS   File: Kernel.py    MIT License 6 votes vote down vote up
def _processRandom(self, elem, sessionID):
        """Process a <random> AIML element.

        <random> elements contain zero or more <li> elements.  If
        none, the empty string is returned.  If one or more <li>
        elements are present, one of them is selected randomly to be
        processed recursively and have its results returned.  Only the
        chosen <li> element's contents are processed.  Any non-<li> contents are
        ignored.

        """
        listitems = []
        for e in elem[2:]:
            if e[0] == 'li':
                listitems.append(e)
        if len(listitems) == 0:
            return ""

        # select and process a random listitem.
        random.shuffle(listitems)
        return self._processElement(listitems[0], sessionID)

    # <sentence> 
Example 18
Project: fs_image   Author: facebookincubator   File: common.py    MIT License 5 votes vote down vote up
def shuffled(it: Iterable[T]) -> List[T]:
    l = list(it)
    random.shuffle(l)
    return l 
Example 19
Project: SyNEThesia   Author: RunOrVeith   File: data_loaders.py    MIT License 5 votes vote down vote up
def reset(self):
        if self.allow_shuffle:
                    random.shuffle(self._iterator_source)
        self.iterator = iter(self._iterator_source) 
Example 20
Project: Caffe-Python-Data-Layer   Author: liuxianming   File: BasePythonDataLayer.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def shuffle(self):
        """Shuffle all samples and their labels"""
        shuffled_data_ = list(zip(self._data, self._label))
        random.shuffle(shuffled_data_)
        self._data, self._label = zip(*shuffled_data_)
        self._data = list(self._data)
        self._label = list(self._label) 
Example 21
Project: meta-transfer-learning   Author: erfaneshrati   File: miniimagenet.py    MIT License 5 votes vote down vote up
def sample(self, num_images):
        """
        Sample images (as numpy arrays) from the class.

        Returns:
          A sequence of 84x84x3 numpy arrays.
          Each pixel ranges from 0 to 1.
        """
        names = [f for f in os.listdir(self.dir_path) if f.endswith('.JPEG')]
        random.shuffle(names)
        images = []
        for name in names[:num_images]:
            images.append(self._read_image(name))
        return images 
Example 22
Project: meta-transfer-learning   Author: erfaneshrati   File: reptile.py    MIT License 5 votes vote down vote up
def _mini_batches(samples, batch_size, num_batches, replacement):
    """
    Generate mini-batches from some data.

    Returns:
      An iterable of sequences of (input, label) pairs,
        where each sequence is a mini-batch.
    """
    samples = list(samples)
    if replacement:
        for _ in range(num_batches):
            yield random.sample(samples, batch_size)
        return
    cur_batch = []
    batch_count = 0
    while True:
        random.shuffle(samples)
        for sample in samples:
            cur_batch.append(sample)
            if len(cur_batch) < batch_size:
                continue
            yield cur_batch
            cur_batch = []
            batch_count += 1
            if batch_count == num_batches:
                return 
Example 23
Project: explirefit   Author: codogogo   File: data_helper.py    Apache License 2.0 5 votes vote down vote up
def create_corrupts(correct_train, correct_test, concept_dict, prev_dict, num_corrupt = 2, shuffle = True):
	concepts = list(concept_dict.values())
	train_corrupt = []
	test_corrupt = []
	current_dict = {}	

	merged = []
	merged.extend(correct_train)
	merged.extend(correct_test)

	for i in range(len(merged)):
		rel_str = merged[i][1] + "_" + merged[i][0]
		if rel_str not in prev_dict and rel_str not in current_dict:
			(train_corrupt if i < len(correct_train) else test_corrupt).append((merged[i][1], merged[i][0], "0"))
			current_dict[rel_str] = 1
		
		for j in range(num_corrupt - 1):
			c1 = concepts[random.randint(0, len(concepts) - 1)]
			c2 = concepts[random.randint(0, len(concepts) - 1)]
			rel_str = c1 + "_" + c2
			while(rel_str in prev_dict or rel_str in current_dict):
				c1 = concepts[random.randint(0, len(concepts) - 1)]
				c2 = concepts[random.randint(0, len(concepts) - 1)]
				rel_str = c1 + "_" + c2
			(train_corrupt if i < len(correct_train) else test_corrupt).append((c1, c2, "0"))
			current_dict[rel_str] = 1
			
	fdata_train = []
	fdata_train.extend(correct_train)
	fdata_train.extend(train_corrupt)
	
	fdata_test = []
	fdata_test.extend(correct_test)
	fdata_test.extend(test_corrupt)

	if shuffle:
		random.shuffle(fdata_train)
		random.shuffle(fdata_test)
	
	return (fdata_train, fdata_test) 
Example 24
Project: explirefit   Author: codogogo   File: trainer.py    Apache License 2.0 5 votes vote down vote up
def test(self, test_data, batch_size, eval_params = None, print_batches = False):
		epoch_loss = 0
		batches_eval = batcher.batch_iter(test_data, batch_size, 1, shuffle = False)
		eval_batch_counter = 1
				
		for batch_eval in batches_eval:
			if (len(batch_eval) == batch_size):
				feed_dict_eval, golds_batch_eval = self.feed_dict_function(self.model, batch_eval, None, predict = True)	
				preds_batch_eval = self.predict(feed_dict_eval)
				batch_eval_loss = self.model.loss.eval(session = self.session, feed_dict = feed_dict_eval)
				epoch_loss += batch_eval_loss

				if eval_batch_counter == 1:
					golds = golds_batch_eval
					preds = preds_batch_eval
				else:
					golds = np.concatenate((golds, golds_batch_eval), axis = 0)
					preds = np.concatenate((preds, preds_batch_eval), axis = 0)
				if print_batches:
					print(eval_batch_counter)
			eval_batch_counter += 1

		if self.eval_func is not None:
			score = self.eval_func(golds, preds, eval_params)
			return preds, epoch_loss, score
		else:
			return preds, epoch_loss 
Example 25
Project: explirefit   Author: codogogo   File: trainer.py    Apache License 2.0 5 votes vote down vote up
def train(self, train_data, batch_size, max_num_epochs, num_epochs_not_better_end = 5, epoch_diff_smaller_end = 1e-5, print_batch_losses = True, configuration = None, eval_params = None, shuffle_data = True):
		batch_counter = 0
		epoch_counter = 0
		epoch_losses = []
		epoch_loss = 0
		batches_in_epoch = int(len(train_data)/batch_size) + 1

		batches = batcher.batch_iter(train_data, batch_size, max_num_epochs, shuffle = shuffle_data)
		for batch in batches:
			batch_counter += 1

			if (len(batch) == batch_size):
				
				feed_dict, gold_labels = self.feed_dict_function(self.model, batch, config = configuration)
				if print_batch_losses:
					print("Batch " + str(batch_counter) + ": running single iteration training..." )
				self.train_model_single_iteration(feed_dict)
			
				batch_loss = self.model.loss.eval(session = self.session, feed_dict = feed_dict)
				if print_batch_losses:
					print("Batch " + str(batch_counter) + ": " + str(batch_loss))

			if batch_counter % batches_in_epoch == 0:
				epoch_counter += 1
				print("Evaluating the epoch loss for epoch " + str(epoch_counter))
				
				preds, epoch_loss, score = self.test(train_data, batch_size, eval_params, False)

				print("Epoch " + str(epoch_counter) + ": " + str(epoch_loss))
				print("Epoch (train) performance: " + str(score))
				print("Previous epochs: " + str(epoch_losses))

				if len(epoch_losses) == num_epochs_not_better_end and (epoch_losses[0] - epoch_loss < epoch_diff_smaller_end):
					break
				else: 
					epoch_losses.append(epoch_loss)
					epoch_loss = 0
					if len(epoch_losses) > num_epochs_not_better_end:
						epoch_losses.pop(0) 
Example 26
Project: pyblish-win   Author: pyblish   File: test_io.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_threads(self):
        try:
            # Write out many bytes with exactly the same number of 0's,
            # 1's... 255's. This will help us check that concurrent reading
            # doesn't duplicate or forget contents.
            N = 1000
            l = list(range(256)) * N
            random.shuffle(l)
            s = bytes(bytearray(l))
            with self.open(support.TESTFN, "wb") as f:
                f.write(s)
            with self.open(support.TESTFN, self.read_mode, buffering=0) as raw:
                bufio = self.tp(raw, 8)
                errors = []
                results = []
                def f():
                    try:
                        # Intra-buffer read then buffer-flushing read
                        for n in cycle([1, 19]):
                            s = bufio.read(n)
                            if not s:
                                break
                            # list.append() is atomic
                            results.append(s)
                    except Exception as e:
                        errors.append(e)
                        raise
                threads = [threading.Thread(target=f) for x in range(20)]
                for t in threads:
                    t.start()
                time.sleep(0.02) # yield
                for t in threads:
                    t.join()
                self.assertFalse(errors,
                    "the following exceptions were caught: %r" % errors)
                s = b''.join(results)
                for i in range(256):
                    c = bytes(bytearray([i]))
                    self.assertEqual(s.count(c), N)
        finally:
            support.unlink(support.TESTFN) 
Example 27
Project: pyblish-win   Author: pyblish   File: test_math.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def testFactorial(self):
        def fact(n):
            result = 1
            for i in range(1, int(n)+1):
                result *= i
            return result
        values = range(10) + [50, 100, 500]
        random.shuffle(values)
        for x in values:
            for cast in (int, long, float):
                self.assertEqual(math.factorial(cast(x)), fact(x), (x, fact(x), math.factorial(x)))
        self.assertRaises(ValueError, math.factorial, -1)
        self.assertRaises(ValueError, math.factorial, math.pi) 
Example 28
Project: pyblish-win   Author: pyblish   File: test_sort.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_cmpNone(self):
        # Testing None as a comparison function.

        L = range(50)
        random.shuffle(L)
        L.sort(None)
        self.assertEqual(L, range(50)) 
Example 29
Project: pyblish-win   Author: pyblish   File: test_sort.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_decorated(self):
        data = 'The quick Brown fox Jumped over The lazy Dog'.split()
        copy = data[:]
        random.shuffle(data)
        data.sort(key=str.lower)
        copy.sort(cmp=lambda x,y: cmp(x.lower(), y.lower())) 
Example 30
Project: pyblish-win   Author: pyblish   File: test_sort.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_reverse(self):
        data = range(100)
        random.shuffle(data)
        data.sort(reverse=True)
        self.assertEqual(data, range(99,-1,-1))
        self.assertRaises(TypeError, data.sort, "wrong type") 
Example 31
Project: pyblish-win   Author: pyblish   File: test_dict.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_literal_constructor(self):
        # check literal constructor for different sized dicts
        # (to exercise the BUILD_MAP oparg).
        for n in (0, 1, 6, 256, 400):
            items = [(''.join(random.sample(string.letters, 8)), i)
                     for i in range(n)]
            random.shuffle(items)
            formatted_items = ('{!r}: {:d}'.format(k, v) for k, v in items)
            dictliteral = '{' + ', '.join(formatted_items) + '}'
            self.assertEqual(eval(dictliteral), dict(items)) 
Example 32
Project: visual-asset-generator   Author: Automattic   File: spotlight.py    GNU General Public License v3.0 5 votes vote down vote up
def render(self, magic, frame_path, copy, cta):
        if len(copy) > 0:
            self.copy = copy[:self.content['character_limit']]
        else:
            self.copy = LOREM[:self.content['character_limit']]
        self.cta = cta
        if len(cta) == 0:
            self.cta = 'Get started'
        self.color_scheme = 'blue'
        if (frame_path.find('_b') != -1):
            self.color_scheme = 'pink'
        self.db.newDrawing()
        self.db.size(self.width, self.height)
        self.renderPortrait(magic)
        self.renderFrame(frame_path)
        self.renderLogo()

        #randomize the order of the content of the ad
        # renderFunctions = [self.renderBadge, self.renderCopy, self.renderButton]
        # shuffle(renderFunctions)
        # cursor = self.height
        # for i in range(0,len(renderFunctions)):
        #     cursor = renderFunctions[i](cursor)

        cursor = self.renderBadge(self.height)
        cursor = self.renderCopy(cursor)
        cursor = self.renderButton(cursor) 
Example 33
Project: python-samples   Author: dek-odoo   File: dek_program084.py    Apache License 2.0 5 votes vote down vote up
def main(numlist):
    shuffle(numlist)
    print numlist 
Example 34
Project: python-samples   Author: dek-odoo   File: dek_program085.py    Apache License 2.0 5 votes vote down vote up
def main(lst):
    # print lst, type(lst)
    # print ''.join(lst)
    shuffle(lst)
    # print 'sdfgsdg'
    return lst 
Example 35
Project: claxon   Author: vanatteveldt   File: learning_speed.py    GNU General Public License v3.0 5 votes vote down vote up
def shuffle_annotations(annotations):
    # Would like to use sessions, but at least crime just doesn't have any, so split and shuffle Niek data and add the rest
    x, y = annotations[:682], annotations[682:]
    random.shuffle(x)
    return x + y 
Example 36
Project: comet-commonsense   Author: atcbosselut   File: conceptnet.py    Apache License 2.0 5 votes vote down vote up
def reset_offsets(self, splits=["train", "test", "dev"],
                      shuffle=True, keys=None):
        if isinstance(splits, str):
            splits = [splits]

        for split in splits:
            if keys is None:
                keys = ["total", "positive", "negative"]

            for key in keys:
                self.offsets[split][key] = 0

            if shuffle:
                self.shuffle_sequences(split, keys) 
Example 37
Project: comet-commonsense   Author: atcbosselut   File: atomic.py    Apache License 2.0 5 votes vote down vote up
def reset_offsets(self, splits=["train", "test", "dev"],
                      shuffle=True, keys=None):
        if isinstance(splits, str):
            splits = [splits]

        for split in splits:
            if keys is None:
                keys = ["total"]

            for key in keys:
                self.offsets[split][key] = 0

            if shuffle:
                self.shuffle_sequences(split, keys) 
Example 38
Project: ieml   Author: IEMLdev   File: test_tree_graph.py    GNU General Public License v3.0 5 votes vote down vote up
def _tree_from_range(self, max):
        r = list(range(1, max))
        random.shuffle(r)
        transitions = {(0, i, 'data') for i in r}
        return TreeGraph(transitions) 
Example 39
Project: DBC-FederatedLearning-Client-VNX   Author: DeepBrainChain   File: file_manipulator.py    Apache License 2.0 5 votes vote down vote up
def general_split(input_path, out_folders, out_fractions, shuff = True, remove_original=False):
    #print(os.path.join(input_path, outputpath))
    #print(os.walk(input_path))

    for dirpath, dirnames, filenames in os.walk(input_path):
        print("dirpath:")
        print(dirpath)
        print("dirnames:")
        print(dirnames)
        
        n=len(filenames)
        print("number of files: "+str(n))
        lout= len(out_folders)
        k=0
        if shuff == True:
            random.shuffle(filenames)
        else:
            pass
        out_number = []

        for i, (outputpath, outfraction) in enumerate(zip(out_folders, out_fractions)):
            structure = os.path.join(outputpath, os.path.relpath(dirpath,input_path))
            print("structure:")
            print(structure)
            if not os.path.isdir(structure):
                os.mkdir(structure)
            else:
                print("Folder does already exits!")
            if i < lout-1:
                x = int(n*outfraction)
            elif out_number != []:
                x = n-sum(out_number)
            elif out_number == 0:
                x = 0
            out_number.append(x)
            
            for filename in filenames[k:(k+x)]:
                copyfile(os.path.join(dirpath,filename), os.path.join(structure, filename))

    if remove_original==True:
        shutil.rmtree(input_path) 
Example 40
Project: kuaa   Author: rafaelwerneck   File: common.py    GNU General Public License v3.0 5 votes vote down vote up
def shuffleData(labels, feats):
    assert len(labels) == len(feats)

    indexes = range(len(labels))
    random.shuffle(indexes)

    def simpleShuffle(lst):
        return map(lambda i: lst[i], indexes)

    labels = simpleShuffle(labels)
    feats = simpleShuffle(feats)

    return labels, feats 
Example 41
Project: Electrolyte_Analysis_FTIR   Author: Samuel-Buteau   File: Constant_run.py    MIT License 5 votes vote down vote up
def __init__(self, n_samples=None, list_of_indecies=None):
        if not n_samples is None:
            self.GetFresh_list = numpy.arange(n_samples, dtype=numpy.int32)
            self.get_fresh_count = n_samples
        elif not list_of_indecies is None:
            self.GetFresh_list = numpy.array(copy.deepcopy(list_of_indecies))
            self.get_fresh_count = len(self.GetFresh_list)
        else:
            raise Exception('Invalid Input')

        numpy.random.shuffle(self.GetFresh_list)
        self.get_fresh_pos = 0 
Example 42
Project: Electrolyte_Analysis_FTIR   Author: Samuel-Buteau   File: Constant_run.py    MIT License 5 votes vote down vote up
def get(self, n):
        """
        will return a list of n random numbers in self.GetFresh_list
        - Samuel Buteau, October 2018
        """
        if n >= self.get_fresh_count:
            return numpy.concatenate((self.get(int(n/2)),self.get(n- int(n/2))))


        reshuffle_flag = False

        n_immediate_fulfill = min(n, self.get_fresh_count - self.get_fresh_pos)
        batch_of_indecies = numpy.empty([n], dtype=numpy.int32)
        for i in range(0, n_immediate_fulfill):
            batch_of_indecies[i] = self.GetFresh_list[i + self.get_fresh_pos]

        self.get_fresh_pos += n_immediate_fulfill
        if self.get_fresh_pos >= self.get_fresh_count:
            self.get_fresh_pos -= self.get_fresh_count
            reshuffle_flag = True

            # Now, the orders that needed to be satisfied are satisfied.
        n_delayed_fulfill = max(0, n - n_immediate_fulfill)
        if reshuffle_flag:
            numpy.random.shuffle(self.GetFresh_list)

        if n_delayed_fulfill > 0:
            for i in range(0, n_delayed_fulfill):
                batch_of_indecies[i + n_immediate_fulfill] = self.GetFresh_list[i]
            self.get_fresh_pos = n_delayed_fulfill

        return batch_of_indecies 
Example 43
Project: Kaggle-Statoil-Challenge   Author: adodd202   File: utils.py    MIT License 5 votes vote down vote up
def get_mean_and_std(dataset):
    '''Compute the mean and std value of dataset.'''
    dataloader = trainloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)

    mean = torch.zeros(3)
    std = torch.zeros(3)
    print('==> Computing mean and std..')
    for inputs, targets in dataloader:
        for i in range(3):
            mean[i] += inputs[:, i, :, :].mean()
            std[i] += inputs[:, i, :, :].std()
    mean.div_(len(dataset))
    std.div_(len(dataset))
    return mean, std 
Example 44
Project: Kaggle-Statoil-Challenge   Author: adodd202   File: utils.py    MIT License 5 votes vote down vote up
def getStatoilTrainValLoaders(args):
    fixSeed(args)
    local_data = pd.read_json('/home/adodd202/train.json')

    local_data = shuffle(local_data)  # otherwise same validation set each time!
    local_data = local_data.reindex(np.random.permutation(local_data.index))

    local_data['band_1'] = local_data['band_1'].apply(lambda x: np.array(x).reshape(75, 75))
    local_data['band_2'] = local_data['band_2'].apply(lambda x: np.array(x).reshape(75, 75))
    local_data['inc_angle'] = pd.to_numeric(local_data['inc_angle'], errors='coerce')
    local_data['inc_angle'].fillna(0, inplace=True)

    band_1 = np.concatenate([im for im in local_data['band_1']]).reshape(-1, 75, 75)
    band_2 = np.concatenate([im for im in local_data['band_2']]).reshape(-1, 75, 75)
    # band_3=(band_1+band_2)/2
    local_full_img = np.stack([band_1, band_2], axis=1)

    train_imgs = XnumpyToTensor(local_full_img, args)
    train_targets = YnumpyToTensor(local_data['is_iceberg'].values, args)
    dset_train = TensorDataset(train_imgs, train_targets)

    local_train_ds, local_val_ds = trainTestSplit(dset_train, args.validationRatio)
    local_train_loader = torch.utils.data.DataLoader(local_train_ds, batch_size=args.batch_size, shuffle=False,
                                                     num_workers=args.workers)
    local_val_loader = torch.utils.data.DataLoader(local_val_ds, batch_size=args.batch_size, shuffle=False,
                                                   num_workers=args.workers)
    return local_train_loader, local_val_loader, local_train_ds, local_val_ds 
Example 45
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_MODS.py    MIT License 5 votes vote down vote up
def DSetGlobal(self, directory = '/home/musk/MODS_data/data/shuffled'):
          '''
          Function to build a rough dataset of images with labels.
          Returns a pkl file with data and data_labels.
          '''
          ## Find files in folders
          foldername = next(os.walk(directory))[1]
          for dirname in foldername: 
          ##dirname: positive and negative
	     print datetime.datetime.now()
             f2 = os.path.join(directory,dirname)
             onlyfiles = [ f3 for f3 in os.listdir(f2) if os.path.isfile(os.path.join(f2,f3))]		
             suffix = dirname
             if suffix == 'positive':
			label = 1
             else:
			label = 0
             for filename in onlyfiles:
                 try: ##reads the image, converts to greyscale, resizes it, appends it to data and adds label too
                     current_image = scipy.misc.imread(os.path.join(f2,filename), mode='L')
                     #current_image = scipy.misc.imread(os.path.join(f2,filename), mode='RGB')
                     #current_image = scipy.misc.imresize(current_image,(256, 192),interp='cubic')
                     current_image = scipy.misc.imresize(current_image,(224,224),interp='bicubic')
                     self.data.append(numpy.hstack(current_image))
                     self.data_label.append(label)
                 except IOError: ##If the image can't be read, or is corrupted
                     print(filename)
                 #scipy.misc.imshow(current_image) ##shows the image being read 
           
          ## shuffles de images with their label
          combined = zip(self.data, self.data_label)
          random.shuffle(combined)
          self.data[:], self.data_label[:] = zip(*combined)
          
          print len(self.data)

          dataset = [self.data, self.data_label]
          f = file('MODS_data.pkl','wb') ##save images in a pkl
          cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
          f.close()
          print(datetime.datetime.now() - self.start_time) 
Example 46
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_MODS.py    MIT License 5 votes vote down vote up
def Djoin(self, name='seg_MODS_data.pkl'):
         '''
         Takes as input segmented data from the Dset function. The data is split into
         training and testing. Each list (dataset) in the segmented data is taken,
         once, as the testing set. Then, the rest of the data is shuffled, and put
         into the testing set. Therefore, for each dataset, we have a different testing
         set of images, with also a different set of training images, shuffled twice.     
         Returns n datasets (same amount as in Dset). The datasets are made of two lists:
         training and testing. These lists are made of two lists each: data and labels.
         '''
         f = file(name, 'rb')
         datamama = cPickle.load(f)
         f.close()
         for i in xrange(len(datamama)):
             data_join = []
             data_label_join = []
             validation = datamama[i]
             data_temp = datamama[:i] + datamama[i+1:]
             for j in data_temp:
                 data_join+=j[0]
                 data_label_join+=j[1]
             
             ##Shuffle data
             combined = zip(data_join, data_label_join)
             random.shuffle(combined)
             data_join[:], data_label_join[:] = zip(*combined)                 
            
             training = [data_join,data_label_join]
             dataset_new = [training,validation]
             f = file('cut_MODS_all_data_bw_224_224_{0}.pkl'.format(i),'wb')
             cPickle.dump(dataset_new, f, protocol=cPickle.HIGHEST_PROTOCOL)
             f.close() 
Example 47
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_labs_MODS.py    MIT License 5 votes vote down vote up
def Djoin(self, v, name='seg_MODS_data.pkl'):
         '''
         Takes as input segmented data from the Dset function. The data is split into
         training and testing. Each list (dataset) in the segmented data is taken,
         once, as the testing set. Then, the rest of the data is shuffled, and put
         into the testing set. Therefore, for each dataset, we have a different testing
         set of images, with also a different set of training images, shuffled twice.     
         Returns n datasets (same amount as in Dset). The datasets are made of two lists:
         training and testing. These lists are made of two lists each: data and labels.
         '''
         f = file(name, 'rb')
         datamama = cPickle.load(f)
         f.close()
         for i in xrange(len(datamama)):
             data_join = []
             data_label_join = []
	     #if 'test' in v:
             validation = datamama[i]
             data_temp = datamama[:i] + datamama[i+1:]
	     #else:
		#validation = []
		#data_temp = datamama[:]
             for j in data_temp:
                 data_join+=j[0]
                 data_label_join+=j[1]
             
             ##Shuffle data
             combined = zip(data_join, data_label_join)
             random.shuffle(combined)
             data_join[:], data_label_join[:] = zip(*combined)                 
            
             training = [data_join,data_label_join]
             dataset_new = [training,validation]
             f = file('MODS_224_224_{0}_{1}.pkl'.format(i, v),'wb')
	     print len(validation), v
             cPickle.dump(dataset_new, f, protocol=cPickle.HIGHEST_PROTOCOL)
             f.close() 
Example 48
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_MODS.py    MIT License 5 votes vote down vote up
def DSetGlobal(self, directory = '/home/musk/MODS_data/data/shuffled'):
          '''
          Function to build a rough dataset of images with labels.
          Returns a pkl file with data and data_labels.
          '''
          ## Find files in folders
          foldername = next(os.walk(directory))[1]
          for dirname in foldername: 
          ##dirname: positive and negative
	     print datetime.datetime.now()
             f2 = os.path.join(directory,dirname)
             onlyfiles = [ f3 for f3 in os.listdir(f2) if os.path.isfile(os.path.join(f2,f3))]		
             suffix = dirname
             if suffix == 'positive':
			label = 1
             else:
			label = 0
             for filename in onlyfiles:
                 try: ##reads the image, converts to greyscale, resizes it, appends it to data and adds label too
                     current_image = scipy.misc.imread(os.path.join(f2,filename), mode='L')
                     #current_image = scipy.misc.imread(os.path.join(f2,filename), mode='RGB')
                     #current_image = scipy.misc.imresize(current_image,(256, 192),interp='cubic')
                     current_image = scipy.misc.imresize(current_image,(224,224),interp='bicubic')
                     self.data.append(numpy.hstack(current_image))
                     self.data_label.append(label)
                 except IOError: ##If the image can't be read, or is corrupted
                     print(filename)
                 #scipy.misc.imshow(current_image) ##shows the image being read 
           
          ## shuffles de images with their label
          combined = zip(self.data, self.data_label)
          random.shuffle(combined)
          self.data[:], self.data_label[:] = zip(*combined)
          
          print len(self.data)

          dataset = [self.data, self.data_label]
          f = file('MODS_data.pkl','wb') ##save images in a pkl
          cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
          f.close()
          print(datetime.datetime.now() - self.start_time) 
Example 49
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_labs_MODS.py    MIT License 5 votes vote down vote up
def Djoin(self, v, name='seg_MODS_data.pkl'):
         '''
         Takes as input segmented data from the Dset function. The data is split into
         training and testing. Each list (dataset) in the segmented data is taken,
         once, as the testing set. Then, the rest of the data is shuffled, and put
         into the testing set. Therefore, for each dataset, we have a different testing
         set of images, with also a different set of training images, shuffled twice.     
         Returns n datasets (same amount as in Dset). The datasets are made of two lists:
         training and testing. These lists are made of two lists each: data and labels.
         '''
         f = file(name, 'rb')
         datamama = cPickle.load(f)
         f.close()
         for i in xrange(len(datamama)):
             data_join = []
             data_label_join = []
	     #if 'test' in v:
             validation = datamama[i]
             data_temp = datamama[:i] + datamama[i+1:]
	     #else:
		#validation = []
		#data_temp = datamama[:]
             for j in data_temp:
                 data_join+=j[0]
                 data_label_join+=j[1]
             
             ##Shuffle data
             combined = zip(data_join, data_label_join)
             random.shuffle(combined)
             data_join[:], data_label_join[:] = zip(*combined)                 
            
             training = [data_join,data_label_join]
             dataset_new = [training,validation]
             f = file('MODS_224_224_{0}_{1}.pkl'.format(i, v),'wb')
	     print len(validation), v
             cPickle.dump(dataset_new, f, protocol=cPickle.HIGHEST_PROTOCOL)
             f.close() 
Example 50
Project: mlimages   Author: icoxfog417   File: chainer_alex.py    MIT License 5 votes vote down vote up
def train(epoch=10, batch_size=32, gpu=False):
    if gpu:
        cuda.check_cuda_available()
    xp = cuda.cupy if gpu else np

    td = TrainingData(LABEL_FILE, img_root=IMAGES_ROOT, image_property=IMAGE_PROP)

    # make mean image
    if not os.path.isfile(MEAN_IMAGE_FILE):
        print("make mean image...")
        td.make_mean_image(MEAN_IMAGE_FILE)
    else:
        td.mean_image_file = MEAN_IMAGE_FILE

    # train model
    label_def = LabelingMachine.read_label_def(LABEL_DEF_FILE)
    model = alex.Alex(len(label_def))
    optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)
    epoch = epoch
    batch_size = batch_size

    print("Now our model is {0} classification task.".format(len(label_def)))
    print("begin training the model. epoch:{0} batch size:{1}.".format(epoch, batch_size))

    if gpu:
        model.to_gpu()

    for i in range(epoch):
        print("epoch {0}/{1}: (learning rate={2})".format(i + 1, epoch, optimizer.lr))
        td.shuffle(overwrite=True)

        for x_batch, y_batch in td.generate_batches(batch_size):
            x = chainer.Variable(xp.asarray(x_batch))
            t = chainer.Variable(xp.asarray(y_batch))

            optimizer.update(model, x, t)
            print("loss: {0}, accuracy: {1}".format(float(model.loss.data), float(model.accuracy.data)))

        serializers.save_npz(MODEL_FILE, model)
        optimizer.lr *= 0.97 
Example 51
Project: mlimages   Author: icoxfog417   File: model.py    MIT License 5 votes vote down vote up
def shuffle(self, overwrite=False):
        """
        This method creates new shuffled file.
        """

        if overwrite:
            shuffled = self.path
        else:
            shuffled = FileAPI.add_ext_name(self.path, "_shuffled")

        lines = open(self.path).readlines()
        random.shuffle(lines)
        open(shuffled, "w").writelines(lines)
        self.path = shuffled 
Example 52
Project: pytorch_NER_BiLSTM_CNN_CRF   Author: bamtercelboo   File: trainer.py    Apache License 2.0 5 votes vote down vote up
def train(self):
        """
        :return:
        """
        epochs = self.config.epochs
        clip_max_norm_use = self.config.clip_max_norm_use
        clip_max_norm = self.config.clip_max_norm
        new_lr = self.config.learning_rate

        for epoch in range(1, epochs + 1):
            print("\n## The {} Epoch, All {} Epochs ! ##".format(epoch, epochs))
            # new_lr = self._dynamic_lr(config=self.config, epoch=epoch, new_lr=new_lr)
            self.optimizer = self._decay_learning_rate(epoch=epoch - 1, init_lr=self.config.learning_rate)
            print("now lr is {}".format(self.optimizer.param_groups[0].get("lr")), end="")
            start_time = time.time()
            random.shuffle(self.train_iter)
            self.model.train()
            steps = 1
            backward_count = 0
            self.optimizer.zero_grad()
            for batch_count, batch_features in enumerate(self.train_iter):
                backward_count += 1
                # self.optimizer.zero_grad()
                word, char, mask, sentence_length, tags = self._get_model_args(batch_features)
                logit = self.model(word, char, sentence_length, train=True)
                loss = self._calculate_loss(logit, mask, tags)
                loss.backward()
                self._clip_model_norm(clip_max_norm_use, clip_max_norm)
                self._optimizer_batch_step(config=self.config, backward_count=backward_count)
                # self.optimizer.step()
                steps += 1
                if (steps - 1) % self.config.log_interval == 0:
                    self.getAcc(self.train_eval, batch_features, logit, self.config)
                    sys.stdout.write(
                        "\nbatch_count = [{}] , loss is {:.6f}, [TAG-ACC is {:.6f}%]".format(batch_count + 1, loss.item(), self.train_eval.acc()))
            end_time = time.time()
            print("\nTrain Time {:.3f}".format(end_time - start_time), end="")
            self.eval(model=self.model, epoch=epoch, config=self.config)
            self._model2file(model=self.model, config=self.config, epoch=epoch)
            self._early_stop(epoch=epoch) 
Example 53
Project: pytorch_NER_BiLSTM_CNN_CRF   Author: bamtercelboo   File: DataLoader_NER.py    Apache License 2.0 5 votes vote down vote up
def _write_shuffle_inst_to_file(insts, path):
        """
        :return:
        """
        w_path = ".".join([path, shuffle])
        if os.path.exists(w_path):
            os.remove(w_path)
        file = open(w_path, encoding="UTF-8", mode="w")
        for id, inst in enumerate(insts):
            for word, label in zip(inst.words, inst.labels):
                file.write(" ".join([word, label, "\n"]))
            file.write("\n")
        print("write shuffle insts to file {}".format(w_path)) 
Example 54
Project: pytorch_NER_BiLSTM_CNN_CRF   Author: bamtercelboo   File: DataLoader_NER.py    Apache License 2.0 5 votes vote down vote up
def _Load_Each_Data(self, path=None, shuffle=False):
        """
        :param path:
        :param shuffle:
        :return:
        """
        assert path is not None, "The Data Path Is Not Allow Empty."
        insts = []
        with open(path, encoding="UTF-8") as f:
            inst = Instance()
            for line in f.readlines():
                line = line.strip()
                if line == "" and len(inst.words) != 0:
                    inst.words_size = len(inst.words)
                    insts.append(inst)
                    inst = Instance()
                else:
                    line = line.strip().split(" ")
                    word = line[0]
                    char = self._add_char(word)
                    word = self._normalize_word(word)
                    inst.chars.append(char)
                    inst.words.append(word)
                    inst.labels.append(line[-1])
                if len(insts) == self.max_count:
                    break
            if len(inst.words) != 0:
                inst.words_size = len(inst.words)
                insts.append(inst)
            # print("\n")
        return insts 
Example 55
Project: VSE-C   Author: ExplorerFreda   File: data.py    MIT License 5 votes vote down vote up
def __getitem__(self, index):
        # handle the image redundancy
        img_id = index//self.im_div
        image = torch.Tensor(self.images[img_id])
        target = self.convert_to_tensor(self.captions[index])
        extended_captions = open(pjoin(self.extended_path, str(index) + '.txt'), 'rb').readlines()
        if self.data_split == 'train':
            random.shuffle(extended_captions)
        extended_captions = [bytes.decode(x).strip() if type(x) == bytes else x.strip() for x in extended_captions]
        extended_captions = [self.convert_to_tensor(st) for st in extended_captions[:self.num_negative]]
        if len(extended_captions) < self.num_negative:
            extended_captions.extend([torch.zeros(target.size()).long()] * (self.num_negative - len(extended_captions)))
        return image, target, index, img_id, extended_captions 
Example 56
Project: VSE-C   Author: ExplorerFreda   File: data.py    MIT License 5 votes vote down vote up
def get_precomp_loader(data_path, data_split, vocab, opt, batch_size=100,
                       shuffle=True, num_workers=2, cap_suffix='caps'):
    """Returns torch.utils.data.DataLoader for custom coco dataset."""
    dset = PrecompDataset(data_path, data_split, vocab, cap_suffix)

    data_loader = torch.utils.data.DataLoader(dataset=dset,
                                              batch_size=batch_size,
                                              shuffle=shuffle,
                                              pin_memory=True,
                                              num_workers=num_workers,
                                              collate_fn=collate_fn)
    return data_loader 
Example 57
Project: VSE-C   Author: ExplorerFreda   File: data.py    MIT License 5 votes vote down vote up
def get_precomp_train_caption_loader(data_path, data_split, vocab, opt, batch_size=100,
                                     shuffle=True, num_workers=2, cap_suffix='caps'):
    dset = PrecompTrainCaptionExtendedDataset(data_path, data_split, vocab, cap_suffix, opt.negative_number)
    data_loader = torch.utils.data.DataLoader(dataset=dset,
                                              batch_size=batch_size,
                                              shuffle=shuffle,
                                              pin_memory=True,
                                              num_workers=num_workers,
                                              collate_fn=collate_fn_train_text)
    return data_loader 
Example 58
Project: VSE-C   Author: ExplorerFreda   File: data.py    MIT License 5 votes vote down vote up
def get_loaders(data_name, vocab, crop_size, batch_size, workers, opt):
    dpath = os.path.join(opt.data_path, data_name)
    if opt.data_name.endswith('_precomp'):
        train_loader = get_precomp_loader(dpath, 'train', vocab, opt, batch_size, True, workers)
        val_loader = get_precomp_loader(dpath, 'dev', vocab, opt, batch_size, False, workers)
    else:
        # Build Dataset Loader
        roots, ids = get_paths(dpath, data_name, opt.use_restval)

        transform = get_transform(data_name, 'train', opt)
        train_loader = get_loader_single(opt.data_name, 'train',
                                         roots['train']['img'],
                                         roots['train']['cap'],
                                         vocab, transform, ids=ids['train'],
                                         batch_size=batch_size, shuffle=True,
                                         num_workers=workers,
                                         collate_fn=collate_fn)

        transform = get_transform(data_name, 'val', opt)
        val_loader = get_loader_single(opt.data_name, 'val',
                                       roots['val']['img'],
                                       roots['val']['cap'],
                                       vocab, transform, ids=ids['val'],
                                       batch_size=batch_size, shuffle=False,
                                       num_workers=workers,
                                       collate_fn=collate_fn)

    return train_loader, val_loader 
Example 59
Project: VSE-C   Author: ExplorerFreda   File: data.py    MIT License 5 votes vote down vote up
def get_text_loader(split_name, data_name, vocab, batch_size,
                    workers, opt, cap_suffix='caps', shuffle=False):
    dpath = os.path.join(opt.data_path, data_name)
    if opt.data_name.endswith('_precomp'):
        dset = PrecompTestCaptionExtendedDataset(dpath, split_name, vocab, cap_suffix)
        data_loader = torch.utils.data.DataLoader(dataset=dset,
                                                  batch_size=batch_size,
                                                  shuffle=shuffle,
                                                  pin_memory=True,
                                                  collate_fn=collate_fn_test_text)
        return data_loader
    else:
        # TODO: implement other loaders
        pass 
Example 60
Project: Image-Caption-Generator   Author: dabasajay   File: load_data.py    MIT License 5 votes vote down vote up
def data_generator(images, captions, tokenizer, max_length, batch_size, random_seed):
	# Setting random seed for reproducibility of results
	random.seed(random_seed)
	# Image ids
	image_ids = list(captions.keys())
	_count=0
	assert batch_size<= len(image_ids), 'Batch size must be less than or equal to {}'.format(len(image_ids))
	while True:
		if _count >= len(image_ids):
			# Generator exceeded or reached the end so restart it
			_count = 0
		# Batch list to store data
		input_img_batch, input_sequence_batch, output_word_batch = list(), list(), list()
		for i in range(_count, min(len(image_ids), _count+batch_size)):
			# Retrieve the image id
			image_id = image_ids[i]
			# Retrieve the image features
			image = images[image_id][0]
			# Retrieve the captions list
			captions_list = captions[image_id]
			# Shuffle captions list
			random.shuffle(captions_list)
			input_img, input_sequence, output_word = create_sequences(tokenizer, max_length, captions_list, image)
			# Add to batch
			for j in range(len(input_img)):
				input_img_batch.append(input_img[j])
				input_sequence_batch.append(input_sequence[j])
				output_word_batch.append(output_word[j])
		_count = _count + batch_size
		yield [[np.array(input_img_batch), np.array(input_sequence_batch)], np.array(output_word_batch)] 
Example 61
Project: pyudorandom   Author: mewwts   File: pyudorandom.py    MIT License 5 votes vote down vote up
def shuffle(ls):
    """
    Takes a list ls and returns a new list with the elements of ls
    in a new order.

    """
    return list(items(ls)) 
Example 62
Project: deep-learning-note   Author: wdxtub   File: utils.py    MIT License 5 votes vote down vote up
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # 随机读取
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        # 最后一次可能不足一个 batch
        j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)])
        yield features.index_select(0, j), labels.index_select(0, j)


# 线性回归模型定义 
Example 63
Project: deep-learning-note   Author: wdxtub   File: utils.py    MIT License 5 votes vote down vote up
def train_opt(optimizer_fn, states, hyperparams, features, labels,
              batch_size=10, num_epochs=2):
    # 初始化模型
    net, loss = linreg, squared_loss

    w = torch.nn.Parameter(torch.tensor(np.random.normal(0, 0.01, size=(features.shape[1], 1)), dtype=torch.float32),
                           requires_grad=True)
    b = torch.nn.Parameter(torch.zeros(1, dtype=torch.float32), requires_grad=True)

    def eval_loss():
        return loss(net(features, w, b), labels).mean().item()

    ls = [eval_loss()]
    data_iter = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(features, labels), batch_size, shuffle=True)

    for _ in range(num_epochs):
        start = time.time()
        for batch_i, (X, y) in enumerate(data_iter):
            l = loss(net(X, w, b), y).mean()  # 使用平均损失

            # 梯度清零
            if w.grad is not None:
                w.grad.data.zero_()
                b.grad.data.zero_()

            l.backward()
            optimizer_fn([w, b], states, hyperparams)  # 迭代模型参数
            if (batch_i + 1) * batch_size % 100 == 0:
                ls.append(eval_loss())  # 每100个样本记录下当前训练误差
    # 打印结果和作图
    print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start))
    plt.plot(np.linspace(0, num_epochs, len(ls)), ls)
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.show()


# 本函数与原书不同的是这里第一个参数优化器函数而不是优化器的名字
# 例如: optimizer_fn=torch.optim.SGD, optimizer_hyperparams={"lr": 0.05} 
Example 64
Project: deep-learning-note   Author: wdxtub   File: utils.py    MIT License 5 votes vote down vote up
def train_opt_pytorch(optimizer_fn, optimizer_hyperparams, features, labels,
                      batch_size=10, num_epochs=2):
    # 初始化模型
    net = nn.Sequential(
        nn.Linear(features.shape[-1], 1)
    )
    loss = nn.MSELoss()
    optimizer = optimizer_fn(net.parameters(), **optimizer_hyperparams)

    def eval_loss():
        return loss(net(features).view(-1), labels).item() / 2

    ls = [eval_loss()]
    data_iter = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(features, labels), batch_size, shuffle=True)

    for _ in range(num_epochs):
        start = time.time()
        for batch_i, (X, y) in enumerate(data_iter):
            # 除以2是为了和train_ch7保持一致, 因为squared_loss中除了2
            l = loss(net(X).view(-1), y) / 2

            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            if (batch_i + 1) * batch_size % 100 == 0:
                ls.append(eval_loss())
    # 打印结果和作图
    print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start))
    plt.plot(np.linspace(0, num_epochs, len(ls)), ls)
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.show() 
Example 65
Project: deep-learning-note   Author: wdxtub   File: 3_linear_regression_raw.py    MIT License 5 votes vote down vote up
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # 随机读取
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        # 最后一次可能不足一个 batch
        j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)])
        yield features.index_select(0, j), labels.index_select(0, j)


# 模型定义 
Example 66
Project: deep-learning-note   Author: wdxtub   File: 19_char_rnn.py    MIT License 5 votes vote down vote up
def read_data(filename, vocab, window, overlap):
    lines = [line.strip() for line in open(filename, 'r').readlines()]
    while True:
        random.shuffle(lines)

        for text in lines:
            text = vocab_encode(text, vocab)
            for start in range(0, len(text) - window, overlap):
                chunk = text[start: start + window]
                chunk += [0] * (window - len(chunk))
                yield chunk


# 批量读取数据 
Example 67
Project: neural-pipeline   Author: toodef   File: data_producer.py    MIT License 5 votes vote down vote up
def shuffle_datasets_order(self, is_need: bool) -> 'DataProducer':
        """
        Is need to shuffle datasets order. Shuffling performs after every 0 index access

        :param is_need: is need
        :return: self object
        """
        self._shuffle_datasets_order = is_need
        return self 
Example 68
Project: neural-pipeline   Author: toodef   File: data_producer.py    MIT License 5 votes vote down vote up
def get_loader(self, indices: [str] = None) -> DataLoader:
        """
        Get PyTorch :class:`DataLoader` object, that aggregate :class:`DataProducer`.
        If ``indices`` is specified - DataLoader will output data only by this indices. In this case indices will not passed.

        :param indices: list of indices. Each item of list is a string in format '{}_{}'.format(dataset_idx, data_idx)
        :return: :class:`DataLoader` object
        """
        if indices is not None:
            return self._get_loader_by_indices(indices)
        return DataLoader(self, batch_size=self.__batch_size, num_workers=self.__num_workers,
                          shuffle=self._glob_shuffle, pin_memory=self._pin_memory, collate_fn=self._collate_fn,
                          drop_last=self._drop_last) 
Example 69
Project: neural-pipeline   Author: toodef   File: data_producer.py    MIT License 5 votes vote down vote up
def _get_loader_by_indices(self, indices: [str]) -> DataLoader:
        """
        Get loader, that produce data only by specified indices

        :param indices: required indices
        :return: :class:`DataLoader` object
        """
        return DataLoader(_ByIndices(self.__datasets, indices), batch_size=self.__batch_size, num_workers=self.__num_workers,
                          shuffle=self._glob_shuffle, pin_memory=self._pin_memory, collate_fn=self._collate_fn,
                          drop_last=self._drop_last) 
Example 70
Project: neural-pipeline   Author: toodef   File: data_producer.py    MIT License 5 votes vote down vote up
def _update_datasets_idx_space(self) -> None:
        """
        Update idx space of datasets. Idx space used for correct mapping global idx to corresponding dataset data index
        """
        if self._shuffle_datasets_order:
            shuffle(self.__datasets)

        datasets_len = [len(d) for d in self.__datasets]
        self.__overall_len = sum(datasets_len)
        self._datatsets_idx_space = []
        cur_len = 0
        for dataset_len in datasets_len:
            self._datatsets_idx_space.append(dataset_len + cur_len - 1)
            cur_len += dataset_len 
Example 71
Project: flasky   Author: RoseOu   File: util.py    MIT License 5 votes vote down vote up
def __iter__(self):
        l = list(set.__iter__(self))
        random.shuffle(l)
        return iter(l) 
Example 72
Project: pyblish-win   Author: pyblish   File: test_thread.py    GNU Lesser General Public License v3.0 4 votes vote down vote up
def test01_1WriterMultiReaders(self):
        if verbose:
            print '\n', '-=' * 30
            print "Running %s.test01_1WriterMultiReaders..." % \
                  self.__class__.__name__

        keys=range(self.records)
        import random
        random.shuffle(keys)
        records_per_writer=self.records//self.writers
        readers_per_writer=self.readers//self.writers
        self.assertEqual(self.records,self.writers*records_per_writer)
        self.assertEqual(self.readers,self.writers*readers_per_writer)
        self.assertTrue((records_per_writer%readers_per_writer)==0)
        readers = []

        for x in xrange(self.readers):
            rt = Thread(target = self.readerThread,
                        args = (self.d, x),
                        name = 'reader %d' % x,
                        )#verbose = verbose)
            if sys.version_info[0] < 3 :
                rt.setDaemon(True)
            else :
                rt.daemon = True
            readers.append(rt)

        writers=[]
        for x in xrange(self.writers):
            a=keys[records_per_writer*x:records_per_writer*(x+1)]
            a.sort()  # Generate conflicts
            b=readers[readers_per_writer*x:readers_per_writer*(x+1)]
            wt = Thread(target = self.writerThread,
                        args = (self.d, a, b),
                        name = 'writer %d' % x,
                        )#verbose = verbose)
            writers.append(wt)

        for t in writers:
            if sys.version_info[0] < 3 :
                t.setDaemon(True)
            else :
                t.daemon = True
            t.start()

        for t in writers:
            t.join()
        for t in readers:
            t.join() 
Example 73
Project: pyblish-win   Author: pyblish   File: test_thread.py    GNU Lesser General Public License v3.0 4 votes vote down vote up
def test02_SimpleLocks(self):
        if verbose:
            print '\n', '-=' * 30
            print "Running %s.test02_SimpleLocks..." % self.__class__.__name__


        keys=range(self.records)
        import random
        random.shuffle(keys)
        records_per_writer=self.records//self.writers
        readers_per_writer=self.readers//self.writers
        self.assertEqual(self.records,self.writers*records_per_writer)
        self.assertEqual(self.readers,self.writers*readers_per_writer)
        self.assertTrue((records_per_writer%readers_per_writer)==0)

        readers = []
        for x in xrange(self.readers):
            rt = Thread(target = self.readerThread,
                        args = (self.d, x),
                        name = 'reader %d' % x,
                        )#verbose = verbose)
            if sys.version_info[0] < 3 :
                rt.setDaemon(True)
            else :
                rt.daemon = True
            readers.append(rt)

        writers = []
        for x in xrange(self.writers):
            a=keys[records_per_writer*x:records_per_writer*(x+1)]
            a.sort()  # Generate conflicts
            b=readers[readers_per_writer*x:readers_per_writer*(x+1)]
            wt = Thread(target = self.writerThread,
                        args = (self.d, a, b),
                        name = 'writer %d' % x,
                        )#verbose = verbose)
            writers.append(wt)

        for t in writers:
            if sys.version_info[0] < 3 :
                t.setDaemon(True)
            else :
                t.daemon = True
            t.start()

        for t in writers:
            t.join()
        for t in readers:
            t.join() 
Example 74
Project: pyblish-win   Author: pyblish   File: test_thread.py    GNU Lesser General Public License v3.0 4 votes vote down vote up
def test03_ThreadedTransactions(self):
        if verbose:
            print '\n', '-=' * 30
            print "Running %s.test03_ThreadedTransactions..." % \
                  self.__class__.__name__

        keys=range(self.records)
        import random
        random.shuffle(keys)
        records_per_writer=self.records//self.writers
        readers_per_writer=self.readers//self.writers
        self.assertEqual(self.records,self.writers*records_per_writer)
        self.assertEqual(self.readers,self.writers*readers_per_writer)
        self.assertTrue((records_per_writer%readers_per_writer)==0)

        readers=[]
        for x in xrange(self.readers):
            rt = Thread(target = self.readerThread,
                        args = (self.d, x),
                        name = 'reader %d' % x,
                        )#verbose = verbose)
            if sys.version_info[0] < 3 :
                rt.setDaemon(True)
            else :
                rt.daemon = True
            readers.append(rt)

        writers = []
        for x in xrange(self.writers):
            a=keys[records_per_writer*x:records_per_writer*(x+1)]
            b=readers[readers_per_writer*x:readers_per_writer*(x+1)]
            wt = Thread(target = self.writerThread,
                        args = (self.d, a, b),
                        name = 'writer %d' % x,
                        )#verbose = verbose)
            writers.append(wt)

        dt = Thread(target = self.deadlockThread)
        if sys.version_info[0] < 3 :
            dt.setDaemon(True)
        else :
            dt.daemon = True
        dt.start()

        for t in writers:
            if sys.version_info[0] < 3 :
                t.setDaemon(True)
            else :
                t.daemon = True
            t.start()

        for t in writers:
            t.join()
        for t in readers:
            t.join()

        self.doLockDetect = False
        dt.join() 
Example 75
Project: DBC-FederatedLearning-Client-VNX   Author: DeepBrainChain   File: file_manipulator.py    Apache License 2.0 4 votes vote down vote up
def refolder(data_folder, targ_folder, train_fraction=0.8, val_fraction=0.2, test_fraction=0.0, 
              remove_original=False):
    r=data_folder
    classes=[f for f in os.listdir(r) if os.path.isdir(os.path.join(r,f))]
    print('1 step')
    if os.path.isdir(targ_folder):
        shutil.rmtree(targ_folder)
    os.mkdir(targ_folder)
    print('step 2')
    sub_folder=os.path.join(targ_folder, 'train')
    os.mkdir(sub_folder)
    for c in classes:
        os.mkdir(os.path.join(sub_folder,c))
    
    sub_folder=os.path.join(targ_folder, 'val')
    os.mkdir(sub_folder)
    for c in classes:
        os.mkdir(os.path.join(sub_folder,c))

    if test_fraction!=0:
        sub_folder=os.path.join(targ_folder, 'test')
        os.mkdir(sub_folder)
        for c in classes:
            os.mkdir(os.path.join(sub_folder,c))
    
    for c in classes:
        files=glob.glob(os.path.join(r,c,"*"))
        random.shuffle(files)
        train_n=int(len(files)*train_fraction)
        for f in files[:train_n]:
            filename = os.path.basename(f)
            copyfile(f, os.path.join(targ_folder,'train', c,filename))
        
        if test_fraction==0:
            for f in files[train_n:]:
                filename = os.path.basename(f)
                copyfile(f, os.path.join(targ_folder,'val', c,filename))
        
        elif test_fraction!=0:
            val_n=int(len(files)*val_fraction)
            for f in files[train_n:(train_n+val_n)]:
                filename = os.path.basename(f)
                copyfile(f, os.path.join(targ_folder,'val', c,filename))
            for f in files[(train_n+val_n):]:
                filename = os.path.basename(f)
                copyfile(f, os.path.join(targ_folder,'test', c,filename))
        
        if remove_original==True:
            shutil.rmtree(data_folder)
			
#merge trees into one 
Example 76
Project: DBC-FederatedLearning-Client-VNX   Author: DeepBrainChain   File: data_processing.py    Apache License 2.0 4 votes vote down vote up
def refolder(data_folder, targ_folder, train_fraction=0.8, val_fraction=0.2, test_fraction=0.0, 
              remove_original=False):
    """
    Rearranging files under source folder (data_folder) with class subfolders 
    into train/val/test folders under target fold (targ_folder)

    Arguments:
        
    """
    
    r=data_folder
    classes=[f for f in os.listdir(r) if os.path.isdir(os.path.join(r,f))]
    print('1 step')
    if os.path.isdir(targ_folder):
        shutil.rmtree(targ_folder)
    os.mkdir(targ_folder)
    print('step 2')
    sub_folder=os.path.join(targ_folder, 'train')
    os.mkdir(sub_folder)
    for c in classes:
        os.mkdir(os.path.join(sub_folder,c))
    
    sub_folder=os.path.join(targ_folder, 'val')
    os.mkdir(sub_folder)
    for c in classes:
        os.mkdir(os.path.join(sub_folder,c))

    if test_fraction!=0:
        sub_folder=os.path.join(targ_folder, 'test')
        os.mkdir(sub_folder)
        for c in classes:
            os.mkdir(os.path.join(sub_folder,c))
    
    for c in classes:
        files=glob.glob(os.path.join(r,c,"*"))
        random.shuffle(files)
        train_n=int(len(files)*train_fraction)
        for f in files[:train_n]:
            filename = os.path.basename(f)
            copyfile(f, os.path.join(targ_folder,'train', c,filename))
        
        if test_fraction==0:
            for f in files[train_n:]:
                filename = os.path.basename(f)
                copyfile(f, os.path.join(targ_folder,'val', c,filename))
        
        elif test_fraction!=0:
            val_n=int(len(files)*val_fraction)
            for f in files[train_n:(train_n+val_n)]:
                filename = os.path.basename(f)
                copyfile(f, os.path.join(targ_folder,'val', c,filename))
            for f in files[(train_n+val_n):]:
                filename = os.path.basename(f)
                copyfile(f, os.path.join(targ_folder,'test', c,filename))
        
        if remove_original==True:
            shutil.rmtree(data_folder) 
Example 77
Project: kuaa   Author: rafaelwerneck   File: common.py    GNU General Public License v3.0 4 votes vote down vote up
def splitTrainTestOS(labels, feats, acs):
    """
    Split 'labels' and 'feats' in train and test for
        open set (OS) evaluation.
    """
    assert isinstance(labels, np.ndarray)
    assert isinstance(feats, np.ndarray)
    assert isinstance(acs, list)
    assert len(labels) == len(feats)
    assert len(acs) > 0
    assert all([x in labels
                for x
                in acs])

    laf = zip(labels, feats)  # labels and feats
    train = filter(lambda (l, f): l in acs, laf)

    grouped = map(lambda x: list(x[1]),
                  it.groupby(train, lambda (l, f): l))  # grouped based on labels

    try:
        hf = current_simulations_settings['hf']
        sf = hf.getStorageFile([labels, feats, acs])
        try:
            seeds_shuffle = sf.loadVars('seeds_shuffle')
            yellow_err('splitTrainTestOS(): try')
        except:
            yellow_err('splitTrainTestOS(): except')
            seeds_shuffle = [random.random()
                             for _ in range(len(grouped))]
            sf.saveVars(seeds_shuffle=seeds_shuffle)
        finally:
            del hf
            del sf
    except NameError:
        seeds_shuffle = [random.random()
                         for _ in range(len(grouped))]

    map(lambda (group, seed): random.shuffle(group, lambda: seed),
        zip(grouped, seeds_shuffle))  # shuffle feats of the same label among them
    grouped = map(lambda group: (group[:len(group) / 2],
                            group[len(group) / 2:]),
                  grouped)  # split each group of some label between train and test

    train, test = reduce(lambda (tr1, te1), (tr2, te2): (tr1 + tr2, te1 + te2),
                         grouped)
    test += map(lambda (l, f): (None, f),
                filter(lambda (l, f): l not in acs,
                       laf))

    ltr, ftr = map(np.array, zip(*train))
    lte, fte = map(np.array, zip(*test))

    assert len(ltr) + len(lte) == len(labels)
    assert len(ftr) + len(fte) == len(feats)

    return ltr, ftr, lte, fte 
Example 78
Project: Electrolyte_Analysis_FTIR   Author: Samuel-Buteau   File: LinearA_run.py    MIT License 4 votes vote down vote up
def train_on_all_data(args):

    """
    This is the code to run in order to train a model on the whole dataset.

    """

    num_concentrations = args['num_concentrations']
    num_samples = args['num_samples']

    res = get_data()

    supervised_dataset = tf.data.Dataset.from_tensor_slices((
        tf.cast(res['supervised']['s'], dtype=tf.float32),
        tf.cast(res['supervised']['m'], dtype=tf.float32),
        tf.cast(res['supervised']['z'], dtype=tf.float32)))

    unsupervised_dataset = tf.data.Dataset.from_tensor_slices((
        tf.cast(res['unsupervised']['s'], dtype=tf.float32),
        tf.cast(res['unsupervised']['m'], dtype=tf.float32),
        tf.cast(res['unsupervised']['z'], dtype=tf.float32)))


    dataset = tf.data.experimental.sample_from_datasets(
        datasets=(
            supervised_dataset.shuffle(10000).repeat(),
            unsupervised_dataset.shuffle(10000).repeat(),
        ),
        weights=[args['prob_supervised'], 1.-args['prob_supervised']]
    )
    dataset = dataset.batch(args['batch_size'])
    # this is where we define the model and optimizer.

    trainer = Trainer(num_concentrations, num_samples, args, trainable=True)

    for s, m, z in dataset:
        current_step = int(trainer.ckpt.step)

        # stop condition.
        if current_step >= args['total_steps']:
            print('Training complete.')
            break

        loss = trainer.train_step(s,m,z, args['batch_size'])

        trainer.ckpt.step.assign_add(1)
        current_step = int(trainer.ckpt.step)
        if (current_step % args['log_every']) == 0:
            if current_step == 2000 and loss > 1.:
                train_on_all_data(args)
                return

            print('Step {} loss {}.'.format(current_step,loss))

        if (current_step % args['checkpoint_every']) == 0 and current_step != 0:
            save_path = trainer.manager.save()
            print("Saved checkpoint for step {}: {}".format(current_step, save_path)) 
Example 79
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_labs_MODS.py    MIT License 4 votes vote down vote up
def DSetGlobal(self, directory = '/home/musk/MODS_data/data/by_lab/'):
          '''
          Function to build a rough dataset of images with labels.
          Returns a pkl file with data and data_labels.
          '''
          ## Find files in folders
	  for v in os.listdir(directory):
		  self.training_data = []
		  self.training_label = []
		  self.validation_data = []
		  self.validation_label = []
		  self.test_data = []
		  self.test_label = []
		  self.data = []
	  	  self.data_label = []
		  print v, directory+v
		  foldername = next(os.walk(directory+v))[1]
		  print foldername
		  #exit()
		  for dirname in foldername: 
		  ##dirname: positive and negative
		     print datetime.datetime.now()
		     f2 = os.path.join(directory+v,dirname)
		     onlyfiles = [ f3 for f3 in os.listdir(f2) if os.path.isfile(os.path.join(f2,f3))]		
		     suffix = dirname
		     if suffix == 'positive':
				label = 1
		     else:
				label = 0
		     for filename in onlyfiles:
		         try: ##reads the image, converts to greyscale, resizes it, appends it to data and adds label too
		             current_image = scipy.misc.imread(os.path.join(f2,filename), mode='L')
		             #current_image = scipy.misc.imread(os.path.join(f2,filename), mode='RGB')
		             #current_image = scipy.misc.imresize(current_image,(256, 192),interp='cubic')
		             current_image = scipy.misc.imresize(current_image,(224,224),interp='bicubic')
		             self.data.append(numpy.hstack(current_image))
		             self.data_label.append(label)
		         except IOError: ##If the image can't be read, or is corrupted
		             print(filename)
		         #scipy.misc.imshow(current_image) ##shows the image being read 
		   
		  ## shuffles de images with their label
		  combined = zip(self.data, self.data_label)
		  random.shuffle(combined)
		  self.data[:], self.data_label[:] = zip(*combined)
		  
		  print len(self.data)

		  dataset = [self.data, self.data_label]
		  f = file('MODS_data_{0}.pkl'.format(v),'wb') ##save images in a pkl
		  cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
		  f.close()
		  print(datetime.datetime.now() - self.start_time) 
Example 80
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_labs_MODS.py    MIT License 4 votes vote down vote up
def DSetGlobal(self, directory = '/home/musk/MODS_data/data/by_lab/'):
          '''
          Function to build a rough dataset of images with labels.
          Returns a pkl file with data and data_labels.
          '''
          ## Find files in folders
	  for v in os.listdir(directory):
		  self.training_data = []
		  self.training_label = []
		  self.validation_data = []
		  self.validation_label = []
		  self.test_data = []
		  self.test_label = []
		  self.data = []
	  	  self.data_label = []
		  print v, directory+v
		  foldername = next(os.walk(directory+v))[1]
		  print foldername
		  #exit()
		  for dirname in foldername: 
		  ##dirname: positive and negative
		     print datetime.datetime.now()
		     f2 = os.path.join(directory+v,dirname)
		     onlyfiles = [ f3 for f3 in os.listdir(f2) if os.path.isfile(os.path.join(f2,f3))]		
		     suffix = dirname
		     if suffix == 'positive':
				label = 1
		     else:
				label = 0
		     for filename in onlyfiles:
		         try: ##reads the image, converts to greyscale, resizes it, appends it to data and adds label too
		             current_image = scipy.misc.imread(os.path.join(f2,filename), mode='L')
		             #current_image = scipy.misc.imread(os.path.join(f2,filename), mode='RGB')
		             #current_image = scipy.misc.imresize(current_image,(256, 192),interp='cubic')
		             current_image = scipy.misc.imresize(current_image,(224,224),interp='bicubic')
		             self.data.append(numpy.hstack(current_image))
		             self.data_label.append(label)
		         except IOError: ##If the image can't be read, or is corrupted
		             print(filename)
		         #scipy.misc.imshow(current_image) ##shows the image being read 
		   
		  ## shuffles de images with their label
		  combined = zip(self.data, self.data_label)
		  random.shuffle(combined)
		  self.data[:], self.data_label[:] = zip(*combined)
		  
		  print len(self.data)

		  dataset = [self.data, self.data_label]
		  f = file('MODS_data_{0}.pkl'.format(v),'wb') ##save images in a pkl
		  cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
		  f.close()
		  print(datetime.datetime.now() - self.start_time)