Python tensorflow.contrib.learn.python.learn.datasets.base.maybe_download() Examples
The following are 14
code examples of tensorflow.contrib.learn.python.learn.datasets.base.maybe_download().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.contrib.learn.python.learn.datasets.base
, or try the search function
.
Example #1
Source File: text_datasets.py From lambda-packs with MIT License | 5 votes |
def maybe_download_dbpedia(data_dir): """Download if DBpedia data is not present.""" train_path = os.path.join(data_dir, 'dbpedia_csv/train.csv') test_path = os.path.join(data_dir, 'dbpedia_csv/test.csv') if not (gfile.Exists(train_path) and gfile.Exists(test_path)): archive_path = base.maybe_download( 'dbpedia_csv.tar.gz', data_dir, DBPEDIA_URL) tfile = tarfile.open(archive_path, 'r:*') tfile.extractall(data_dir)
Example #2
Source File: text_datasets.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def maybe_download_dbpedia(data_dir): """Download if DBpedia data is not present.""" train_path = os.path.join(data_dir, 'dbpedia_csv/train.csv') test_path = os.path.join(data_dir, 'dbpedia_csv/test.csv') if not (gfile.Exists(train_path) and gfile.Exists(test_path)): archive_path = base.maybe_download( 'dbpedia_csv.tar.gz', data_dir, DBPEDIA_URL) tfile = tarfile.open(archive_path, 'r:*') tfile.extractall(data_dir)
Example #3
Source File: text_datasets.py From deep_image_model with Apache License 2.0 | 5 votes |
def maybe_download_dbpedia(data_dir): """Download if DBpedia data is not present.""" train_path = os.path.join(data_dir, 'dbpedia_csv/train.csv') test_path = os.path.join(data_dir, 'dbpedia_csv/test.csv') if not (gfile.Exists(train_path) and gfile.Exists(test_path)): archive_path = base.maybe_download( 'dbpedia_csv.tar.gz', data_dir, DBPEDIA_URL) tfile = tarfile.open(archive_path, 'r:*') tfile.extractall(data_dir)
Example #4
Source File: import_ldc93s1.py From AVSR-Deep-Speech with GNU General Public License v2.0 | 5 votes |
def _download_and_preprocess_data(data_dir): # Conditionally download data LDC93S1_BASE = "LDC93S1" LDC93S1_BASE_URL = "https://catalog.ldc.upenn.edu/desc/addenda/" local_file = base.maybe_download(LDC93S1_BASE + ".wav", data_dir, LDC93S1_BASE_URL + LDC93S1_BASE + ".wav") trans_file = base.maybe_download(LDC93S1_BASE + ".txt", data_dir, LDC93S1_BASE_URL + LDC93S1_BASE + ".txt") with open(trans_file, "r") as fin: transcript = ' '.join(fin.read().strip().lower().split(' ')[2:]).replace('.', '') df = pandas.DataFrame(data=[(os.path.abspath(local_file), os.path.getsize(local_file), transcript)], columns=["wav_filename", "wav_filesize", "transcript"]) df.to_csv(os.path.join(data_dir, "ldc93s1.csv"), index=False)
Example #5
Source File: text_datasets.py From keras-lambda with MIT License | 5 votes |
def maybe_download_dbpedia(data_dir): """Download if DBpedia data is not present.""" train_path = os.path.join(data_dir, 'dbpedia_csv/train.csv') test_path = os.path.join(data_dir, 'dbpedia_csv/test.csv') if not (gfile.Exists(train_path) and gfile.Exists(test_path)): archive_path = base.maybe_download( 'dbpedia_csv.tar.gz', data_dir, DBPEDIA_URL) tfile = tarfile.open(archive_path, 'r:*') tfile.extractall(data_dir)
Example #6
Source File: input_data.py From cloudml-samples with Apache License 2.0 | 4 votes |
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' VALIDATION_SIZE = 5000 local_file = base.maybe_download(TRAIN_IMAGES, train_dir, SOURCE_URL + TRAIN_IMAGES) train_images = extract_images(local_file) local_file = base.maybe_download(TRAIN_LABELS, train_dir, SOURCE_URL + TRAIN_LABELS) train_labels = extract_labels(local_file, one_hot=one_hot) local_file = base.maybe_download(TEST_IMAGES, train_dir, SOURCE_URL + TEST_IMAGES) test_images = extract_images(local_file) local_file = base.maybe_download(TEST_LABELS, train_dir, SOURCE_URL + TEST_LABELS) test_labels = extract_labels(local_file, one_hot=one_hot) validation_images = train_images[:VALIDATION_SIZE] validation_labels = train_labels[:VALIDATION_SIZE] train_images = train_images[VALIDATION_SIZE:] train_labels = train_labels[VALIDATION_SIZE:] train = DataSet(train_images, train_labels, start_id=0, dtype=dtype) validation = DataSet(validation_images, validation_labels, start_id=len(train_images), dtype=dtype) test = DataSet(test_images, test_labels, start_id=(len(train_images) + len(validation_images)), dtype=dtype) return base.Datasets(train=train, validation=validation, test=test)
Example #7
Source File: mnist.py From auto-alt-text-lambda-api with MIT License | 4 votes |
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32, reshape=True, validation_size=5000): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' local_file = base.maybe_download(TRAIN_IMAGES, train_dir, SOURCE_URL + TRAIN_IMAGES) with open(local_file, 'rb') as f: train_images = extract_images(f) local_file = base.maybe_download(TRAIN_LABELS, train_dir, SOURCE_URL + TRAIN_LABELS) with open(local_file, 'rb') as f: train_labels = extract_labels(f, one_hot=one_hot) local_file = base.maybe_download(TEST_IMAGES, train_dir, SOURCE_URL + TEST_IMAGES) with open(local_file, 'rb') as f: test_images = extract_images(f) local_file = base.maybe_download(TEST_LABELS, train_dir, SOURCE_URL + TEST_LABELS) with open(local_file, 'rb') as f: test_labels = extract_labels(f, one_hot=one_hot) if not 0 <= validation_size <= len(train_images): raise ValueError( 'Validation size should be between 0 and {}. Received: {}.' .format(len(train_images), validation_size)) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape) validation = DataSet(validation_images, validation_labels, dtype=dtype, reshape=reshape) test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape) return base.Datasets(train=train, validation=validation, test=test)
Example #8
Source File: mnist.py From deep_image_model with Apache License 2.0 | 4 votes |
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32, reshape=True, validation_size=5000): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' local_file = base.maybe_download(TRAIN_IMAGES, train_dir, SOURCE_URL + TRAIN_IMAGES) with open(local_file, 'rb') as f: train_images = extract_images(f) local_file = base.maybe_download(TRAIN_LABELS, train_dir, SOURCE_URL + TRAIN_LABELS) with open(local_file, 'rb') as f: train_labels = extract_labels(f, one_hot=one_hot) local_file = base.maybe_download(TEST_IMAGES, train_dir, SOURCE_URL + TEST_IMAGES) with open(local_file, 'rb') as f: test_images = extract_images(f) local_file = base.maybe_download(TEST_LABELS, train_dir, SOURCE_URL + TEST_LABELS) with open(local_file, 'rb') as f: test_labels = extract_labels(f, one_hot=one_hot) if not 0 <= validation_size <= len(train_images): raise ValueError( 'Validation size should be between 0 and {}. Received: {}.' .format(len(train_images), validation_size)) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape) validation = DataSet(validation_images, validation_labels, dtype=dtype, reshape=reshape) test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape) return base.Datasets(train=train, validation=validation, test=test)
Example #9
Source File: dgp_rff_mnist.py From deep_gp_random_features with Apache License 2.0 | 4 votes |
def import_mnist(): """ This import mnist and saves the data as an object of our DataSet class :return: """ SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/' TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' VALIDATION_SIZE = 0 ONE_HOT = True TRAIN_DIR = 'MNIST_data' local_file = base.maybe_download(TRAIN_IMAGES, TRAIN_DIR, SOURCE_URL + TRAIN_IMAGES) train_images = extract_images(open(local_file, 'rb')) local_file = base.maybe_download(TRAIN_LABELS, TRAIN_DIR, SOURCE_URL + TRAIN_LABELS) train_labels = extract_labels(open(local_file, 'rb'), one_hot=ONE_HOT) local_file = base.maybe_download(TEST_IMAGES, TRAIN_DIR, SOURCE_URL + TEST_IMAGES) test_images = extract_images(open(local_file, 'rb')) local_file = base.maybe_download(TEST_LABELS, TRAIN_DIR, SOURCE_URL + TEST_LABELS) test_labels = extract_labels(open(local_file, 'rb'), one_hot=ONE_HOT) validation_images = train_images[:VALIDATION_SIZE] validation_labels = train_labels[:VALIDATION_SIZE] train_images = train_images[VALIDATION_SIZE:] train_labels = train_labels[VALIDATION_SIZE:] ## Process images train_images = process_mnist(train_images) validation_images = process_mnist(validation_images) test_images = process_mnist(test_images) ## Standardize data train_mean, train_std = get_data_info(train_images) # train_images = standardize_data(train_images, train_mean, train_std) # validation_images = standardize_data(validation_images, train_mean, train_std) # test_images = standardize_data(test_images, train_mean, train_std) data = DataSet(train_images, train_labels) test = DataSet(test_images, test_labels) val = DataSet(validation_images, validation_labels) return data, test, val
Example #10
Source File: dataset.py From tanda with MIT License | 4 votes |
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32, reshape=True, validation_size=5000): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' local_file = base.maybe_download(TRAIN_IMAGES, train_dir, SOURCE_URL + TRAIN_IMAGES) with open(local_file, 'rb') as f: train_images = extract_images(f) local_file = base.maybe_download(TRAIN_LABELS, train_dir, SOURCE_URL + TRAIN_LABELS) with open(local_file, 'rb') as f: train_labels = extract_labels(f, one_hot=one_hot) local_file = base.maybe_download(TEST_IMAGES, train_dir, SOURCE_URL + TEST_IMAGES) with open(local_file, 'rb') as f: test_images = extract_images(f) local_file = base.maybe_download(TEST_LABELS, train_dir, SOURCE_URL + TEST_LABELS) with open(local_file, 'rb') as f: test_labels = extract_labels(f, one_hot=one_hot) if not 0 <= validation_size <= len(train_images): raise ValueError( 'Validation size should be between 0 and {}. Received: {}.' .format(len(train_images), validation_size)) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape) validation = DataSet(validation_images, validation_labels, dtype=dtype, reshape=reshape) test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape) return base.Datasets(train=train, validation=validation, test=test)
Example #11
Source File: cifar10.py From memoryGAN with MIT License | 4 votes |
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32, reshape=True, validation_size=0): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) gz_file_name = 'cifar-10-python.tar.gz' local_file = base.maybe_download(gz_file_name, train_dir, SOURCE_URL + gz_file_name) train_images = [] train_labels = [] for i in range(1, 6): with open(os.path.join(train_dir, 'cifar-10-batches-py', 'data_batch_%d'%i)) as f: batch = numpy.load(f) tmp_images = batch['data'].reshape([-1, 3, 32, 32]) train_images.append(tmp_images.transpose([0, 2, 3, 1])) train_labels += batch['labels'] train_images = numpy.concatenate(train_images) train_labels = numpy.array(train_labels) if not 0 <= validation_size <= len(train_images): raise ValueError( 'Validation size should be between 0 and {}. Received: {}.' .format(len(train_images), validation_size)) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape) validation = DataSet(validation_images, validation_labels, dtype=dtype, reshape=reshape) #test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape) test = None return base.Datasets(train=train, validation=validation, test=test)
Example #12
Source File: fashion.py From memoryGAN with MIT License | 4 votes |
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32, reshape=True, validation_size=5000): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' local_file = base.maybe_download(TRAIN_IMAGES, train_dir, SOURCE_URL + TRAIN_IMAGES) with open(local_file, 'rb') as f: train_images = extract_images(f) local_file = base.maybe_download(TRAIN_LABELS, train_dir, SOURCE_URL + TRAIN_LABELS) with open(local_file, 'rb') as f: train_labels = extract_labels(f, one_hot=one_hot) local_file = base.maybe_download(TEST_IMAGES, train_dir, SOURCE_URL + TEST_IMAGES) with open(local_file, 'rb') as f: test_images = extract_images(f) local_file = base.maybe_download(TEST_LABELS, train_dir, SOURCE_URL + TEST_LABELS) with open(local_file, 'rb') as f: test_labels = extract_labels(f, one_hot=one_hot) if not 0 <= validation_size <= len(train_images): raise ValueError( 'Validation size should be between 0 and {}. Received: {}.' .format(len(train_images), validation_size)) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape) validation = DataSet(validation_images, validation_labels, dtype=dtype, reshape=reshape) test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape) return base.Datasets(train=train, validation=validation, test=test)
Example #13
Source File: affmnist.py From memoryGAN with MIT License | 4 votes |
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32, reshape=True, validation_size=5000): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' local_file = base.maybe_download(TRAIN_IMAGES, train_dir, SOURCE_URL + TRAIN_IMAGES) with open(local_file, 'rb') as f: train_images = extract_images(f) local_file = base.maybe_download(TRAIN_LABELS, train_dir, SOURCE_URL + TRAIN_LABELS) with open(local_file, 'rb') as f: train_labels = extract_labels(f, one_hot=one_hot) local_file = base.maybe_download(TEST_IMAGES, train_dir, SOURCE_URL + TEST_IMAGES) with open(local_file, 'rb') as f: test_images = extract_images(f) local_file = base.maybe_download(TEST_LABELS, train_dir, SOURCE_URL + TEST_LABELS) with open(local_file, 'rb') as f: test_labels = extract_labels(f, one_hot=one_hot) if not 0 <= validation_size <= len(train_images): raise ValueError( 'Validation size should be between 0 and {}. Received: {}.' .format(len(train_images), validation_size)) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape) validation = DataSet(validation_images, validation_labels, dtype=dtype, reshape=reshape) test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape) return base.Datasets(train=train, validation=validation, test=test)
Example #14
Source File: mnist.py From keras-lambda with MIT License | 4 votes |
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32, reshape=True, validation_size=5000): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' local_file = base.maybe_download(TRAIN_IMAGES, train_dir, SOURCE_URL + TRAIN_IMAGES) with open(local_file, 'rb') as f: train_images = extract_images(f) local_file = base.maybe_download(TRAIN_LABELS, train_dir, SOURCE_URL + TRAIN_LABELS) with open(local_file, 'rb') as f: train_labels = extract_labels(f, one_hot=one_hot) local_file = base.maybe_download(TEST_IMAGES, train_dir, SOURCE_URL + TEST_IMAGES) with open(local_file, 'rb') as f: test_images = extract_images(f) local_file = base.maybe_download(TEST_LABELS, train_dir, SOURCE_URL + TEST_LABELS) with open(local_file, 'rb') as f: test_labels = extract_labels(f, one_hot=one_hot) if not 0 <= validation_size <= len(train_images): raise ValueError( 'Validation size should be between 0 and {}. Received: {}.' .format(len(train_images), validation_size)) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape) validation = DataSet(validation_images, validation_labels, dtype=dtype, reshape=reshape) test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape) return base.Datasets(train=train, validation=validation, test=test)