Python tensorflow.python.platform.gfile.Glob() Examples

The following are 30 code examples of tensorflow.python.platform.gfile.Glob(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.python.platform.gfile , or try the search function

Example #1

Source File: captcha_records.py From captcha_recognize with Apache License 2.0

6 votes

def create_data_list(image_dir):
  if not gfile.Exists(image_dir):
    print("Image director '" + image_dir + "' not found.")
    return None
  extensions = ['jpg', 'JPG', 'jpeg', 'JPEG', 'png', 'PNG']
  print("Looking for images in '" + image_dir + "'")
  file_list = []
  for extension in extensions:
    file_glob = os.path.join(image_dir, '*.' + extension)
    file_list.extend(gfile.Glob(file_glob))
  if not file_list:
    print("No files found in '" + image_dir + "'")
    return None
  images = []
  labels = []
  for file_name in file_list:
    image = Image.open(file_name)
    image_gray = image.convert('L')
    image_resize = image_gray.resize(size=(IMAGE_WIDTH,IMAGE_HEIGHT))
    input_img = np.array(image_resize, dtype='int16')
    image.close()
    label_name = os.path.basename(file_name).split('_')[0]
    images.append(input_img)
    labels.append(label_name)
  return zip(images, labels)

Example #2

Source File: convert_bair.py From amortized-variational-filtering with MIT License

6 votes

def convert(data_path):
    # iterate through the data splits
    for data_split in ['train', 'test']:
        os.makedirs(os.path.join(data_path, data_split))
        data_split_path = os.path.join(data_path, 'softmotion30_44k', data_split)
        data_split_files = gfile.Glob(os.path.join(data_split_path, '*'))
        # iterate through the TF records
        for f in data_split_files:
            print('Current file: ' + f)
            ind = int(f.split('/')[-1].split('_')[1]) # starting video index
            # iterate through the sequences in this TF record
            for serialized_example in tf.python_io.tf_record_iterator(f):
                os.makedirs(os.path.join(data_path, data_split, str(ind)))
                example = tf.train.Example()
                example.ParseFromString(serialized_example)
                # iterate through the sequence
                for i in range(30):
                    image_name = str(i) + '/image_aux1/encoded'
                    byte_str = example.features.feature[image_name].bytes_list.value[0]
                    img = Image.frombytes('RGB', (64, 64), byte_str)
                    img = np.array(img.getdata()).reshape(img.size[1], img.size[0], 3) / 255.
                    imsave(os.path.join(data_path, data_split, str(ind), str(i) + '.png'), img)
                print('     Finished processing sequence ' + str(ind))
                ind += 1

Example #3

Source File: push_dataset_grab_train_images.py From costar_plan with Apache License 2.0

6 votes

def build_image_input(train=True, novel=True):
  """Create input tfrecord tensors.

  Args:
    novel: whether or not to grab novel or seen images.
  Returns:
    list of tensors corresponding to images. The images
    tensor is 5D, batch x time x height x width x channels.
  Raises:
    RuntimeError: if no files found.
  """
  if train:
    data_dir = os.path.expanduser('~/Downloads/google_brainrobotdata_push')
  elif novel:
    data_dir = os.path.expanduser('~/Downloads/google_brainrobotdata_push')
  else:
    data_dir = os.path.expanduser('~/Downloads/google_brainrobotdata_push')
  filenames = gfile.Glob(os.path.join(data_dir, '*'))
  print(filenames)
  if not filenames:
    raise RuntimeError('No data files found.')
  filename_queue = tf.train.string_input_producer(filenames, shuffle=False)
  reader = tf.TFRecordReader()
  _, serialized_example = reader.read(filename_queue)

Example #4

Source File: setup_predictor.py From visual_foresight with MIT License

6 votes

def get_maxiter_weights(dir):
    try:
        filenames = gfile.Glob(dir + '/model*')
    except NotFoundError:
        print('nothing found at ', dir + '/model*')
        return None
    iternums = []
    if len(filenames) != 0:
        for f in filenames:
            try:
                iternums.append(int(re.match('.*?([0-9]+)$', f).group(1)))
            except:
                iternums.append(-1)
        iternums = np.array(iternums)
        return filenames[np.argmax(iternums)].split('.')[0]  # skip the str after the '.'
    else:
        return None

Example #5

Source File: graph_io_test.py From deep_image_model with Apache License 2.0

5 votes

def test_read_text_lines_multifile(self):
    gfile.Glob = self._orig_glob
    filenames = self._create_sorted_temp_files(["ABC\n", "DEF\nGHK\n"])

    batch_size = 1
    queue_capacity = 5
    name = "my_batch"

    with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
      inputs = tf.contrib.learn.io.read_batch_examples(
          filenames, batch_size, reader=tf.TextLineReader,
          randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
          name=name)
      self.assertAllEqual((None,), inputs.get_shape().as_list())
      session.run(tf.local_variables_initializer())

      coord = tf.train.Coordinator()
      threads = tf.train.start_queue_runners(session, coord=coord)

      self.assertEqual("%s:1" % name, inputs.name)
      file_name_queue_name = "%s/file_name_queue" % name
      file_names_name = "%s/input" % file_name_queue_name
      example_queue_name = "%s/fifo_queue" % name
      test_util.assert_ops_in_graph({
          file_names_name: "Const",
          file_name_queue_name: "FIFOQueue",
          "%s/read/TextLineReader" % name: "TextLineReader",
          example_queue_name: "FIFOQueue",
          name: "QueueDequeueUpTo"
      }, g)

      self.assertAllEqual(session.run(inputs), [b"ABC"])
      self.assertAllEqual(session.run(inputs), [b"DEF"])
      self.assertAllEqual(session.run(inputs), [b"GHK"])
      with self.assertRaises(errors.OutOfRangeError):
        session.run(inputs)

      coord.request_stop()
      coord.join(threads)

Example #6

Source File: parallel_reader.py From deep_image_model with Apache License 2.0

5 votes

def get_data_files(data_sources):
  """Get data_files from data_sources.

  Args:
    data_sources: a list/tuple of files or the location of the data, i.e.
      /path/to/train@128, /path/to/train* or /tmp/.../train*

  Returns:
    a list of data_files.

  Raises:
    ValueError: if not data files are not found

  """
  if isinstance(data_sources, (list, tuple)):
    data_files = []
    for source in data_sources:
      data_files += get_data_files(source)
  else:
    if '*' in data_sources or '?' in data_sources or '[' in data_sources:
      data_files = gfile.Glob(data_sources)
    else:
      data_files = [data_sources]
  if not data_files:
    raise ValueError('No data files found in %s', data_sources)
  return data_files

Example #7

Source File: tensorflow_dataframe.py From deep_image_model with Apache License 2.0

5 votes

def _expand_file_names(filepatterns):
  """Takes a list of file patterns and returns a list of resolved file names."""
  if not isinstance(filepatterns, (list, tuple, set)):
    filepatterns = [filepatterns]
  filenames = set()
  for filepattern in filepatterns:
    names = set(gfile.Glob(filepattern))
    filenames |= names
  return list(filenames)

Example #8

Source File: captcha_recognize.py From captcha_recognize with Apache License 2.0

5 votes

def input_data(image_dir):
  if not gfile.Exists(image_dir):
    print(">> Image director '" + image_dir + "' not found.")
    return None
  extensions = ['jpg', 'JPG', 'jpeg', 'JPEG', 'png', 'PNG']
  print(">> Looking for images in '" + image_dir + "'")
  file_list = []
  for extension in extensions:
    file_glob = os.path.join(image_dir, '*.' + extension)
    file_list.extend(gfile.Glob(file_glob))
  if not file_list:
    print(">> No files found in '" + image_dir + "'")
    return None
  batch_size = len(file_list)
  images = np.zeros([batch_size, IMAGE_HEIGHT*IMAGE_WIDTH], dtype='float32')
  files = []
  i = 0
  for file_name in file_list:
    image = Image.open(file_name)
    image_gray = image.convert('L')
    image_resize = image_gray.resize(size=(IMAGE_WIDTH,IMAGE_HEIGHT))
    image.close()
    input_img = np.array(image_resize, dtype='float32')
    input_img = np.multiply(input_img.flatten(), 1./255) - 0.5    
    images[i,:] = input_img
    base_name = os.path.basename(file_name)
    files.append(base_name)
    i += 1
  return images, files

Example #9

Source File: input_data.py From honk with MIT License

5 votes

def prepare_background_data(self):
    """Searches a folder for background noise audio, and loads it into memory.

    It's expected that the background audio samples will be in a subdirectory
    named '_background_noise_' inside the 'data_dir' folder, as .wavs that match
    the sample rate of the training data, but can be much longer in duration.

    If the '_background_noise_' folder doesn't exist at all, this isn't an
    error, it's just taken to mean that no background noise augmentation should
    be used. If the folder does exist, but it's empty, that's treated as an
    error.

    Returns:
      List of raw PCM-encoded audio samples of background noise.

    Raises:
      Exception: If files aren't found in the folder.
    """
    self.background_data = []
    background_dir = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME)
    if not os.path.exists(background_dir):
      return self.background_data
    with tf.Session(graph=tf.Graph()) as sess:
      wav_filename_placeholder = tf.placeholder(tf.string, [])
      wav_loader = io_ops.read_file(wav_filename_placeholder)
      wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1)
      search_path = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME,
                                 '*.wav')
      for wav_path in gfile.Glob(search_path):
        wav_data = sess.run(
            wav_decoder,
            feed_dict={wav_filename_placeholder: wav_path}).audio.flatten()
        self.background_data.append(wav_data)
      if not self.background_data:
        raise Exception('No background wav files were found in ' + search_path)

Example #10

Source File: graph_io.py From deep_image_model with Apache License 2.0

5 votes

def read_batch_record_features(file_pattern, batch_size, features,
                               randomize_input=True, num_epochs=None,
                               queue_capacity=10000, reader_num_threads=1,
                               name='dequeue_record_examples'):
  """Reads TFRecord, queues, batches and parses `Example` proto.

  See more detailed description in `read_examples`.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values.
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. NOTE - If specified,
      creates a variable that must be initialized, so call
      tf.local_variables_initializer() as shown in the tests.
    queue_capacity: Capacity for input queue.
    reader_num_threads: The number of threads to read examples.
    name: Name of resulting op.

  Returns:
    A dict of `Tensor` or `SparseTensor` objects for each in `features`.

  Raises:
    ValueError: for invalid inputs.
  """
  return read_batch_features(
      file_pattern=file_pattern,
      batch_size=batch_size,
      features=features,
      reader=io_ops.TFRecordReader,
      randomize_input=randomize_input,
      num_epochs=num_epochs,
      queue_capacity=queue_capacity,
      reader_num_threads=reader_num_threads,
      name=name)

Example #11

Source File: graph_io_test.py From deep_image_model with Apache License 2.0

5 votes

def setUp(self):
    super(GraphIOTest, self).setUp()
    random.seed(42)
    self._orig_glob = gfile.Glob
    gfile.Glob = self._mock_glob

Example #12

Source File: graph_io_test.py From deep_image_model with Apache License 2.0

5 votes

def tearDown(self):
    gfile.Glob = self._orig_glob
    super(GraphIOTest, self).tearDown()

Example #13

Source File: graph_io_test.py From deep_image_model with Apache License 2.0

5 votes

def test_read_text_lines(self):
    gfile.Glob = self._orig_glob
    filename = self._create_temp_file("ABC\nDEF\nGHK\n")

    batch_size = 1
    queue_capacity = 5
    name = "my_batch"

    with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
      inputs = tf.contrib.learn.io.read_batch_examples(
          filename, batch_size, reader=tf.TextLineReader,
          randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
          name=name)
      self.assertAllEqual((None,), inputs.get_shape().as_list())
      session.run(tf.local_variables_initializer())

      coord = tf.train.Coordinator()
      threads = tf.train.start_queue_runners(session, coord=coord)

      self.assertAllEqual(session.run(inputs), [b"ABC"])
      self.assertAllEqual(session.run(inputs), [b"DEF"])
      self.assertAllEqual(session.run(inputs), [b"GHK"])
      with self.assertRaises(errors.OutOfRangeError):
        session.run(inputs)

      coord.request_stop()
      coord.join(threads)

Example #14

Source File: parallel_reader.py From tf-slim with Apache License 2.0

5 votes

def get_data_files(data_sources):
  """Get data_files from data_sources.

  Args:
    data_sources: a list/tuple of files or the location of the data, i.e.
      /path/to/train@128, /path/to/train* or /tmp/.../train*

  Returns:
    a list of data_files.

  Raises:
    ValueError: if data files are not found

  """
  if isinstance(data_sources, (list, tuple)):
    data_files = []
    for source in data_sources:
      data_files += get_data_files(source)
  else:
    if '*' in data_sources or '?' in data_sources or '[' in data_sources:
      data_files = gfile.Glob(data_sources)
    else:
      data_files = [data_sources]
  if not data_files:
    raise ValueError('No data files found in %s' % (data_sources,))
  return data_files

Example #15

Source File: graph_io_test.py From deep_image_model with Apache License 2.0

5 votes

def test_batch_text_lines(self):
    gfile.Glob = self._orig_glob
    filename = self._create_temp_file("A\nB\nC\nD\nE\n")

    batch_size = 3
    queue_capacity = 10
    name = "my_batch"

    with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
      inputs = tf.contrib.learn.io.read_batch_examples(
          [filename], batch_size, reader=tf.TextLineReader,
          randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
          read_batch_size=10, name=name)
      self.assertAllEqual((None,), inputs.get_shape().as_list())
      session.run(tf.local_variables_initializer())

      coord = tf.train.Coordinator()
      threads = tf.train.start_queue_runners(session, coord=coord)

      self.assertAllEqual(session.run(inputs), [b"A", b"B", b"C"])
      self.assertAllEqual(session.run(inputs), [b"D", b"E"])
      with self.assertRaises(errors.OutOfRangeError):
        session.run(inputs)

      coord.request_stop()
      coord.join(threads)

Example #16

Source File: graph_io_test.py From deep_image_model with Apache License 2.0

5 votes

def test_keyed_read_text_lines(self):
    gfile.Glob = self._orig_glob
    filename = self._create_temp_file("ABC\nDEF\nGHK\n")

    batch_size = 1
    queue_capacity = 5
    name = "my_batch"

    with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
      keys, inputs = tf.contrib.learn.io.read_keyed_batch_examples(
          filename, batch_size,
          reader=tf.TextLineReader, randomize_input=False,
          num_epochs=1, queue_capacity=queue_capacity, name=name)
      self.assertAllEqual((None,), keys.get_shape().as_list())
      self.assertAllEqual((None,), inputs.get_shape().as_list())
      session.run(tf.local_variables_initializer())

      coord = tf.train.Coordinator()
      threads = tf.train.start_queue_runners(session, coord=coord)

      self.assertAllEqual(session.run([keys, inputs]),
                          [[filename.encode("utf-8") + b":1"], [b"ABC"]])
      self.assertAllEqual(session.run([keys, inputs]),
                          [[filename.encode("utf-8") + b":2"], [b"DEF"]])
      self.assertAllEqual(session.run([keys, inputs]),
                          [[filename.encode("utf-8") + b":3"], [b"GHK"]])
      with self.assertRaises(errors.OutOfRangeError):
        session.run(inputs)

      coord.request_stop()
      coord.join(threads)

Example #17

Source File: grasp_dataset.py From costar_plan with Apache License 2.0

5 votes

def _get_grasp_tfrecord_info(self, feature_csv_file):
        """ Get the number of features, examples, and the name of features in a grasp tfrecord dataset.

        # Arguments

            feature_csv_file: path to the feature csv file for this dataset

        # Returns
            features: complete list of all features for this dataset aka tfrecord group
            tfrecord_paths: paths to all tfrecords for this dataset
            feature_count: total number of features
            attempt_count: total number of grasp attempts
        """
        features = np.genfromtxt(os.path.join(os.path.expanduser(self.data_dir), feature_csv_file), dtype=str)
        # need to account for multiple datasets with n features like 062_a and 062_b
        feature_count = int(features[0].split('_')[0])
        attempt_count = int(features[1])
        features = features[2:]
        # Workaround for csv files which may not actually list the key features below,
        # although they have been added to the dataset itself.
        if not any('grasp_success' in s for s in features):
            features = np.append(features, 'grasp_success')
            feature_count += 1
        if not any('gripper/status' in s for s in features):
            features = np.append(features, 'gripper/status')
            feature_count += 1
        # note that the tfrecords are often named '*{}.tfrecord*-of-*'
        tfrecord_paths = gfile.Glob(self._get_tfrecord_path_glob_pattern())
        return features, tfrecord_paths, feature_count, attempt_count

Example #18

Source File: util_data.py From kaggle_speech_recognition with MIT License

5 votes

def set_divider(data_dir, key_words, num_folds):
  """Sets the markers for dividing dataset into folds for
    training/ validation split.
    Each fold has equal numbers of key word samples (may has different
    numbers of speakers)
  """
  reg = re.compile('.*/[^/]+/(.*)_nohash_.*.wav')

  speakers = []
  for w in key_words:
    for wav in gfile.Glob(os.path.join(data_dir, w, '*nohash*.wav')):
      speaker = reg.search(wav).groups()[0].lower()
      speakers.append(speaker_hash_mod(speaker))

  speakers.sort()
  total = len(speakers)
  size = total // num_folds
  divider = [0] * (num_folds + 1)
  div_idx = [0] * (num_folds + 1)
  for i in range(num_folds):
    divider[i] = speakers[i * size]
    div_idx[i] = speakers.index(divider[i])
  # +1, for range [divider[fold_i],  divider[fold_i + 1]) used in which_set()
  divider[-1] = speakers[-1] + 1
  div_idx[-1] = total - 1

  fold_speakers = [len(set(speakers[div_idx[i]:div_idx[i + 1]]))
                   for i in range(num_folds)]
  tf.logging.debug('%d, dataset divider: %s', total, divider)
  tf.logging.debug('num_speakers_per_fold: %s', fold_speakers)
  return divider

Example #19

Source File: grasp_dataset.py From costar_plan with Apache License 2.0

5 votes

def _get_tfrecord_path_glob_pattern(self, dataset=None):
        """Get the Glob string pattern for matching the specified dataset tfrecords.

        This will often be used in conjunction with the RecordInput class if you need
        a custom dataset loading function.

        # Arguments
            data_dir: The path to the folder containing the grasp dataset.

            dataset: The name of the dataset to download, downloads all by default
                with the '' parameter, 102 will download the 102 feature dataset
                found in grasp_listing.txt.
        """
        dataset = self._update_dataset_param(dataset)
        return os.path.join(os.path.expanduser(self.data_dir), '*{}.tfrecord*'.format(dataset))

Example #20

Source File: util_data.py From kaggle_speech_recognition with MIT License

5 votes

def load_bg_audios(data_dir, audio_length, bg_noise_folder=BG_NOISE_FOLDER):
  """Loads background noise files.
  """
  path = os.path.join(data_dir, bg_noise_folder)
  if os.path.exists(path):
    wav_files = gfile.Glob(os.path.join(path, '*.wav'))
    bg_audios = [decode_audio(w) for w in wav_files]
  else:
    print('bg noise path %s does not exist' % path)
    bg_audios = [np.zeros([audio_length])]
  return bg_audios

Example #21

Source File: input_data.py From TF_SpeechRecoChallenge with Apache License 2.0

5 votes

def prepare_background_data(self):
    """Searches a folder for background noise audio, and loads it into memory.

    It's expected that the background audio samples will be in a subdirectory
    named '_background_noise_' inside the 'data_dir' folder, as .wavs that match
    the sample rate of the training data, but can be much longer in duration.

    If the '_background_noise_' folder doesn't exist at all, this isn't an
    error, it's just taken to mean that no background noise augmentation should
    be used. If the folder does exist, but it's empty, that's treated as an
    error.

    Returns:
      List of raw PCM-encoded audio samples of background noise.

    Raises:
      Exception: If files aren't found in the folder.
    """
    self.background_data = []
    background_dir = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME)
    if not os.path.exists(background_dir):
      return self.background_data
    with tf.Session(graph=tf.Graph()) as sess:
      wav_filename_placeholder = tf.placeholder(tf.string, [])
      wav_loader = io_ops.read_file(wav_filename_placeholder)
      wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1)
      search_path = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME,
                                 '*.wav')
      for wav_path in gfile.Glob(search_path):
        wav_data = sess.run(
          wav_decoder,
          feed_dict={wav_filename_placeholder: wav_path}).audio.flatten()
        self.background_data.append(wav_data)
      if not self.background_data:
        raise Exception('No background wav files were found in ' + search_path)

Example #22

Source File: infer.py From TF_SpeechRecoChallenge with Apache License 2.0

5 votes

def prepare_data_index(self):
    # Look through all the subfolders to find audio samples
    search_path = os.path.join(self.data_dir, '*', '*.wav')
    self.data_indexs = []
    for wav_path in gfile.Glob(search_path):
      self.data_indexs.append(wav_path)

Example #23

Source File: grasp_dataset.py From costar_plan with Apache License 2.0

5 votes

def _get_feature_csv_file_paths(self, dataset=None):
        """List feature csv files with full paths in the data_dir.
        Feature csv files identify each dataset, the size, and its data channels.
        One example is: 'features_102.csv'
        """
        dataset = self._update_dataset_param(dataset)
        return gfile.Glob(os.path.join(os.path.expanduser(self.data_dir), '*{}*.csv'.format(dataset)))

Example #24

Source File: tensorflow_dataframe.py From auto-alt-text-lambda-api with MIT License

5 votes

def _expand_file_names(filepatterns):
  """Takes a list of file patterns and returns a list of resolved file names."""
  if not isinstance(filepatterns, (list, tuple, set)):
    filepatterns = [filepatterns]
  filenames = set()
  for filepattern in filepatterns:
    names = set(gfile.Glob(filepattern))
    filenames |= names
  return list(filenames)

Example #25

Source File: input_data.py From adversarial_audio with MIT License

5 votes

def prepare_background_data(self):
    """Searches a folder for background noise audio, and loads it into memory.

    It's expected that the background audio samples will be in a subdirectory
    named '_background_noise_' inside the 'data_dir' folder, as .wavs that match
    the sample rate of the training data, but can be much longer in duration.

    If the '_background_noise_' folder doesn't exist at all, this isn't an
    error, it's just taken to mean that no background noise augmentation should
    be used. If the folder does exist, but it's empty, that's treated as an
    error.

    Returns:
      List of raw PCM-encoded audio samples of background noise.

    Raises:
      Exception: If files aren't found in the folder.
    """
    self.background_data = []
    background_dir = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME)
    if not os.path.exists(background_dir):
      return self.background_data
    with tf.Session(graph=tf.Graph()) as sess:
      wav_filename_placeholder = tf.placeholder(tf.string, [])
      wav_loader = io_ops.read_file(wav_filename_placeholder)
      wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1)
      search_path = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME,
                                 '*.wav')
      for wav_path in gfile.Glob(search_path):
        wav_data = sess.run(
            wav_decoder,
            feed_dict={wav_filename_placeholder: wav_path}).audio.flatten()
        self.background_data.append(wav_data)
      if not self.background_data:
        raise Exception('No background wav files were found in ' + search_path)

Example #26

Source File: graph_io.py From keras-lambda with MIT License

5 votes

def _get_file_names(file_pattern, randomize_input):
  """Parse list of file names from pattern, optionally shuffled.

  Args:
    file_pattern: File glob pattern, or list of strings.
    randomize_input: Whether to shuffle the order of file names.

  Returns:
    List of file names matching `file_pattern`.

  Raises:
    ValueError: If `file_pattern` is empty, or pattern matches no files.
  """
  if isinstance(file_pattern, list):
    file_names = file_pattern
    if not file_names:
      raise ValueError('No files given to dequeue_examples.')
  else:
    file_names = list(gfile.Glob(file_pattern))
    if not file_names:
      raise ValueError('No files match %s.' % file_pattern)

  # Sort files so it will be deterministic for unit tests. They'll be shuffled
  # in `string_input_producer` if `randomize_input` is enabled.
  if not randomize_input:
    file_names = sorted(file_names)
  return file_names

Example #27

Source File: parallel_reader.py From lambda-packs with MIT License

5 votes

def get_data_files(data_sources):
  """Get data_files from data_sources.

  Args:
    data_sources: a list/tuple of files or the location of the data, i.e.
      /path/to/train@128, /path/to/train* or /tmp/.../train*

  Returns:
    a list of data_files.

  Raises:
    ValueError: if not data files are not found

  """
  if isinstance(data_sources, (list, tuple)):
    data_files = []
    for source in data_sources:
      data_files += get_data_files(source)
  else:
    if '*' in data_sources or '?' in data_sources or '[' in data_sources:
      data_files = gfile.Glob(data_sources)
    else:
      data_files = [data_sources]
  if not data_files:
    raise ValueError('No data files found in %s' % (data_sources,))
  return data_files

Example #28

Source File: dataset_ops.py From lambda-packs with MIT License

5 votes

def _get_file_names(file_pattern, randomize_input):
  """Parse list of file names from pattern, optionally shuffled.

  Args:
    file_pattern: File glob pattern, or list of glob patterns.
    randomize_input: Whether to shuffle the order of file names.

  Returns:
    List of file names matching `file_pattern`.

  Raises:
    ValueError: If `file_pattern` is empty, or pattern matches no files.
  """
  if isinstance(file_pattern, list):
    if not file_pattern:
      raise ValueError("File pattern is empty.")
    file_names = []
    for entry in file_pattern:
      file_names.extend(gfile.Glob(entry))
  else:
    file_names = list(gfile.Glob(file_pattern))

  if not file_names:
    raise ValueError("No files match %s." % file_pattern)

  # Sort files so it will be deterministic for unit tests.
  if not randomize_input:
    file_names = sorted(file_names)
  return file_names

Example #29

Source File: tensorflow_dataframe.py From lambda-packs with MIT License

5 votes

def _expand_file_names(filepatterns):
  """Takes a list of file patterns and returns a list of resolved file names."""
  if not isinstance(filepatterns, (list, tuple, set)):
    filepatterns = [filepatterns]
  filenames = set()
  for filepattern in filepatterns:
    names = set(gfile.Glob(filepattern))
    filenames |= names
  return list(filenames)

Example #30

Source File: graph_io.py From lambda-packs with MIT License

5 votes

def _get_file_names(file_pattern, randomize_input):
  """Parse list of file names from pattern, optionally shuffled.

  Args:
    file_pattern: File glob pattern, or list of glob patterns.
    randomize_input: Whether to shuffle the order of file names.

  Returns:
    List of file names matching `file_pattern`.

  Raises:
    ValueError: If `file_pattern` is empty, or pattern matches no files.
  """
  if isinstance(file_pattern, list):
    if not file_pattern:
      raise ValueError('No files given to dequeue_examples.')
    file_names = []
    for entry in file_pattern:
      file_names.extend(gfile.Glob(entry))
  else:
    file_names = list(gfile.Glob(file_pattern))

  if not file_names:
    raise ValueError('No files match %s.' % file_pattern)

  # Sort files so it will be deterministic for unit tests. They'll be shuffled
  # in `string_input_producer` if `randomize_input` is enabled.
  if not randomize_input:
    file_names = sorted(file_names)
  return file_names