Python sklearn.datasets.load_files() Examples

The following are 16 code examples for showing how to use sklearn.datasets.load_files(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.datasets , or try the search function .

Example 1
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_base.py    License: MIT License 5 votes vote down vote up
def test_default_empty_load_files(load_files_root):
    res = load_files(load_files_root)
    assert_equal(len(res.filenames), 0)
    assert_equal(len(res.target_names), 0)
    assert_equal(res.DESCR, None) 
Example 2
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_base.py    License: MIT License 5 votes vote down vote up
def test_default_load_files(test_category_dir_1, test_category_dir_2,
                            load_files_root):
    res = load_files(load_files_root)
    assert_equal(len(res.filenames), 1)
    assert_equal(len(res.target_names), 2)
    assert_equal(res.DESCR, None)
    assert_equal(res.data, [b"Hello World!\n"]) 
Example 3
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_base.py    License: MIT License 5 votes vote down vote up
def test_load_files_w_categories_desc_and_encoding(
        test_category_dir_1, test_category_dir_2, load_files_root):
    category = os.path.abspath(test_category_dir_1).split('/').pop()
    res = load_files(load_files_root, description="test",
                     categories=category, encoding="utf-8")
    assert_equal(len(res.filenames), 1)
    assert_equal(len(res.target_names), 1)
    assert_equal(res.DESCR, "test")
    assert_equal(res.data, ["Hello World!\n"]) 
Example 4
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_base.py    License: MIT License 5 votes vote down vote up
def test_load_files_wo_load_content(
        test_category_dir_1, test_category_dir_2, load_files_root):
    res = load_files(load_files_root, load_content=False)
    assert_equal(len(res.filenames), 1)
    assert_equal(len(res.target_names), 2)
    assert_equal(res.DESCR, None)
    assert_equal(res.get('data'), None) 
Example 5
Project: cherry   Author: Windsooon   File: base.py    License: MIT License 5 votes vote down vote up
def _load_data_from_local(
        model, categories=None, encoding=None):
    '''
    1. Find local cache files
    2. If we can't find the cache files
           3.1 Try to create cache files using data files inside `datasets`.
           2.2 Raise error if create cache files failed.
    '''
    model_path = os.path.join(DATA_DIR, model)
    cache_path = os.path.join(model_path, model + '.pkz')
    if os.path.exists(cache_path):
        try:
            with open(cache_path, 'rb') as f:
                compressed_content = f.read()
            uncompressed_content = codecs.decode(
                compressed_content, 'zlib_codec')
            return pickle.loads(uncompressed_content)['all']
        except Exception as e:
            # Can't load cache files
            error = ('Can\'t load cached data from {0}. '
                    'Please try again after delete cache files.'.format(model))
            raise NotSupportError(error)
    cache = dict(all=load_files(
        model_path, categories=categories, encoding=encoding))
    compressed_content = codecs.encode(pickle.dumps(cache), 'zlib_codec')
    with open(cache_path, 'wb') as f:
        f.write(compressed_content)
    return cache['all'] 
Example 6
Project: skorch   Author: skorch-dev   File: model.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __call__(self):
        download()
        dataset = load_files(self.path, categories=['pos', 'neg'])
        X, y = dataset['data'], dataset['target']
        X = np.asarray([x.decode() for x in X])  # decode from bytes
        return X, y 
Example 7
Project: lexpredict-contraxsuite   Author: LexPredict   File: lease_train.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def load_lease_dataset(root):
    return load_files(root) 
Example 8
Project: text-classification   Author: zhengwsh   File: data_helpers.py    License: Apache License 2.0 5 votes vote down vote up
def get_datasets_localdata(container_path=None, categories=None, load_content=True,
                       encoding='utf-8', shuffle=True, random_state=42):
    """
    Load text files with categories as subfolder names.
    Individual samples are assumed to be files stored a two levels folder structure.
    :param container_path: The path of the container
    :param categories: List of classes to choose, all classes are chosen by default (if empty or omitted)
    :param shuffle: shuffle the list or not
    :param random_state: seed integer to shuffle the dataset
    :return: data and labels of the dataset
    """
    datasets = load_files(container_path=container_path, categories=categories,
                          load_content=load_content, shuffle=shuffle, encoding=encoding,
                          random_state=random_state)
    return datasets 
Example 9
Project: opentc   Author: cahya-wirawan   File: generic.py    License: MIT License 5 votes vote down vote up
def __init__(self, cfg=None):
        """
        Load text files with categories as subfolder names.
        Individual samples are assumed to be files stored a two levels folder structure.
        :param container_path: The path of the container
        :param categories: List of classes to choose, all classes are chosen by default (if empty or omitted)
        :param shuffle: shuffle the list or not
        :param random_state: seed integer to shuffle the dataset
        :return: data and labels of the dataset
        """
        super().__init__()
        self.__dataset__ = load_files(container_path=cfg['container_path'], categories=cfg['categories'],
                                      load_content=cfg['load_content'], shuffle=cfg['shuffle'],
                                      encoding=cfg['encoding'], random_state=cfg['random_state']) 
Example 10
Project: opentc   Author: cahya-wirawan   File: cnn_text_util.py    License: MIT License 5 votes vote down vote up
def get_datasets_localdata(container_path=None, categories=None, load_content=True,
                       encoding='utf-8', shuffle=True, random_state=42):
    """
    Load text files with categories as subfolder names.
    Individual samples are assumed to be files stored a two levels folder structure.
    :param container_path: The path of the container
    :param categories: List of classes to choose, all classes are chosen by default (if empty or omitted)
    :param shuffle: shuffle the list or not
    :param random_state: seed integer to shuffle the dataset
    :return: data and labels of the dataset
    """
    datasets = load_files(container_path=container_path, categories=categories,
                          load_content=load_content, shuffle=shuffle, encoding=encoding,
                          random_state=random_state)
    return datasets 
Example 11
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_base.py    License: MIT License 5 votes vote down vote up
def test_default_empty_load_files():
    res = load_files(LOAD_FILES_ROOT)
    assert_equal(len(res.filenames), 0)
    assert_equal(len(res.target_names), 0)
    assert_equal(res.DESCR, None) 
Example 12
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_base.py    License: MIT License 5 votes vote down vote up
def test_default_load_files():
    res = load_files(LOAD_FILES_ROOT)
    assert_equal(len(res.filenames), 1)
    assert_equal(len(res.target_names), 2)
    assert_equal(res.DESCR, None)
    assert_equal(res.data, [b("Hello World!\n")]) 
Example 13
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_base.py    License: MIT License 5 votes vote down vote up
def test_load_files_w_categories_desc_and_encoding():
    category = os.path.abspath(TEST_CATEGORY_DIR1).split('/').pop()
    res = load_files(LOAD_FILES_ROOT, description="test",
                     categories=category, encoding="utf-8")
    assert_equal(len(res.filenames), 1)
    assert_equal(len(res.target_names), 1)
    assert_equal(res.DESCR, "test")
    assert_equal(res.data, [u("Hello World!\n")]) 
Example 14
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_base.py    License: MIT License 5 votes vote down vote up
def test_load_files_wo_load_content():
    res = load_files(LOAD_FILES_ROOT, load_content=False)
    assert_equal(len(res.filenames), 1)
    assert_equal(len(res.target_names), 2)
    assert_equal(res.DESCR, None)
    assert_equal(res.get('data'), None) 
Example 15
Project: facenet-demo   Author: 1024210879   File: batch_represent.py    License: MIT License 4 votes vote down vote up
def main(args):

	with tf.Graph().as_default():

		with tf.Session() as sess:

			# create output directory if it doesn't exist
			output_dir = os.path.expanduser(args.output_dir)
			if not os.path.isdir(output_dir):
				os.makedirs(output_dir)

			# load the model
			print("Loading trained model...\n")
			meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.trained_model_dir))
			facenet.load_model(args.trained_model_dir, meta_file, ckpt_file)

			# grab all image paths and labels
			print("Finding image paths and targets...\n")
			data = load_files(args.data_dir, load_content=False, shuffle=False)
			labels_array = data['target']
			paths = data['filenames']

			# Get input and output tensors
			images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
			embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
			phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")

			image_size = images_placeholder.get_shape()[1]
			embedding_size = embeddings.get_shape()[1]

			# Run forward pass to calculate embeddings
			print('Generating embeddings from images...\n')
			start_time = time.time()
			batch_size = args.batch_size
			nrof_images = len(paths)
			nrof_batches = int(np.ceil(1.0*nrof_images / batch_size))
			emb_array = np.zeros((nrof_images, embedding_size))
			for i in xrange(nrof_batches):
				start_index = i*batch_size
				end_index = min((i+1)*batch_size, nrof_images)
				paths_batch = paths[start_index:end_index]
				images = facenet.load_data(paths_batch, do_random_crop=False, do_random_flip=False, image_size=image_size, do_prewhiten=True)
				feed_dict = { images_placeholder:images, phase_train_placeholder:False}
				emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict)

			time_avg_forward_pass = (time.time() - start_time) / float(nrof_images)
			print("Forward pass took avg of %.3f[seconds/image] for %d images\n" % (time_avg_forward_pass, nrof_images))

			print("Finally saving embeddings and gallery to: %s" % (output_dir))
			# save the gallery and embeddings (signatures) as numpy arrays to disk
			np.save(os.path.join(output_dir, "gallery.npy"), labels_array)
			np.save(os.path.join(output_dir, "signatures.npy"), emb_array) 
Example 16
Project: facenet   Author: davidsandberg   File: batch_represent.py    License: MIT License 4 votes vote down vote up
def main(args):

	with tf.Graph().as_default():

		with tf.Session() as sess:

			# create output directory if it doesn't exist
			output_dir = os.path.expanduser(args.output_dir)
			if not os.path.isdir(output_dir):
				os.makedirs(output_dir)

			# load the model
			print("Loading trained model...\n")
			meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.trained_model_dir))
			facenet.load_model(args.trained_model_dir, meta_file, ckpt_file)

			# grab all image paths and labels
			print("Finding image paths and targets...\n")
			data = load_files(args.data_dir, load_content=False, shuffle=False)
			labels_array = data['target']
			paths = data['filenames']

			# Get input and output tensors
			images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
			embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
			phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")

			image_size = images_placeholder.get_shape()[1]
			embedding_size = embeddings.get_shape()[1]

			# Run forward pass to calculate embeddings
			print('Generating embeddings from images...\n')
			start_time = time.time()
			batch_size = args.batch_size
			nrof_images = len(paths)
			nrof_batches = int(np.ceil(1.0*nrof_images / batch_size))
			emb_array = np.zeros((nrof_images, embedding_size))
			for i in xrange(nrof_batches):
				start_index = i*batch_size
				end_index = min((i+1)*batch_size, nrof_images)
				paths_batch = paths[start_index:end_index]
				images = facenet.load_data(paths_batch, do_random_crop=False, do_random_flip=False, image_size=image_size, do_prewhiten=True)
				feed_dict = { images_placeholder:images, phase_train_placeholder:False}
				emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict)

			time_avg_forward_pass = (time.time() - start_time) / float(nrof_images)
			print("Forward pass took avg of %.3f[seconds/image] for %d images\n" % (time_avg_forward_pass, nrof_images))

			print("Finally saving embeddings and gallery to: %s" % (output_dir))
			# save the gallery and embeddings (signatures) as numpy arrays to disk
			np.save(os.path.join(output_dir, "gallery.npy"), labels_array)
			np.save(os.path.join(output_dir, "signatures.npy"), emb_array)