Python tensorflow.gfile.Glob() Examples

The following are 30 code examples of tensorflow.gfile.Glob(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.gfile , or try the search function .
Example #1
Source File: utils.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def read_df_from_gcs(file_pattern):
  """Read data from Google Cloud Storage, split into train and validation sets.

  Assume that the data on GCS is in csv format without header.
  The column names will be provided through metadata

  Args:
    file_pattern: (string) pattern of the files containing training data.
    For example: [gs://bucket/folder_name/prefix]

  Returns:
    pandas.DataFrame
  """

  # Download the files to local /tmp/ folder
  df_list = []

  for filepath in gfile.Glob(file_pattern):
    with gfile.Open(filepath, 'r') as f:
      # Assume there is no header
      df_list.append(pd.read_csv(f, names=metadata.CSV_COLUMNS))

  data_df = pd.concat(df_list)

  return data_df 
Example #2
Source File: inference-combine-tfrecords-video.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def get_input_data_tensors(reader,
                           data_pattern,
                           batch_size=256):
  logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
  with tf.name_scope("eval_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      raise IOError("Unable to find the evaluation files.")
    logging.info("number of evaluation files: " + str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, shuffle=False, num_epochs=1)
    eval_data = reader.prepare_reader(filename_queue)
    return tf.train.batch(
        eval_data,
        batch_size=batch_size,
        capacity=batch_size,
        allow_smaller_final_batch=True,
        enqueue_many=True) 
Example #3
Source File: utils.py    From cloudml-samples with Apache License 2.0 6 votes vote down vote up
def read_df_from_gcs(file_pattern):
  """Read data from Google Cloud Storage, split into train and validation sets.

  Assume that the data on GCS is in csv format without header.
  The column names will be provided through metadata

  Args:
    file_pattern: (string) pattern of the files containing training data.
    For example: [gs://bucket/folder_name/prefix]

  Returns:
    pandas.DataFrame
  """

  # Download the files to local /tmp/ folder
  df_list = []

  for filepath in gfile.Glob(file_pattern):
    with gfile.Open(filepath, 'r') as f:
      # Assume there is no header
      df_list.append(pd.read_csv(f, names=metadata.CSV_COLUMNS))

  data_df = pd.concat(df_list)

  return data_df 
Example #4
Source File: inference-combine-tfrecords-frame.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def get_input_data_tensors(reader,
                           data_pattern,
                           batch_size=256):
  logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
  with tf.name_scope("eval_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      raise IOError("Unable to find the evaluation files.")
    logging.info("number of evaluation files: " + str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, shuffle=False, num_epochs=1)
    eval_data = reader.prepare_reader(filename_queue)
    return tf.train.batch(
        eval_data,
        batch_size=batch_size,
        capacity=4 * batch_size,
        allow_smaller_final_batch=True,
        enqueue_many=True) 
Example #5
Source File: check_distillation.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def get_input_evaluation_tensors(reader,
                                 data_pattern,
                                 batch_size=256):
  logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
  with tf.name_scope("eval_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      print data_pattern, files
      raise IOError("Unable to find the evaluation files.")
    logging.info("number of evaluation files: " + str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, shuffle=False, num_epochs=1)
    eval_data = reader.prepare_reader(filename_queue)
    return tf.train.batch(
        eval_data,
        batch_size=batch_size,
        capacity=3 * batch_size,
        allow_smaller_final_batch=True,
        enqueue_many=True) 
Example #6
Source File: inference-pre-ensemble.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def get_input_data_tensors(reader,
                                 data_pattern,
                                 batch_size=256):
  logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
  with tf.name_scope("eval_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      raise IOError("Unable to find the evaluation files.")
    logging.info("number of evaluation files: " + str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, shuffle=False, num_epochs=1)
    eval_data = reader.prepare_reader(filename_queue)
    return tf.train.batch(
        eval_data,
        batch_size=batch_size,
        capacity=4 * batch_size,
        allow_smaller_final_batch=True,
        enqueue_many=True) 
Example #7
Source File: inference.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def get_input_data_tensors(reader,
                           data_pattern,
                           batch_size=256):
  logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
  with tf.name_scope("eval_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      raise IOError("Unable to find the evaluation files.")
    logging.info("number of evaluation files: " + str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, shuffle=False, num_epochs=1)
    eval_data = reader.prepare_reader(filename_queue)
    return tf.train.batch(
        eval_data,
        batch_size=batch_size,
        capacity=3 * batch_size,
        allow_smaller_final_batch=True,
        enqueue_many=True) 
Example #8
Source File: eval.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def get_input_evaluation_tensors(reader,
                                 data_pattern,
                                 batch_size=256):
  logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
  with tf.name_scope("eval_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      print data_pattern, files
      raise IOError("Unable to find the evaluation files.")
    logging.info("number of evaluation files: " + str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, shuffle=False, num_epochs=1)
    eval_data = reader.prepare_reader(filename_queue)
    return tf.train.batch(
        eval_data,
        batch_size=batch_size,
        capacity=3 * batch_size,
        allow_smaller_final_batch=True,
        enqueue_many=True) 
Example #9
Source File: train.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def get_input_data_tensors(reader,
                           data_pattern,
                           batch_size=256,
                           num_epochs=None):
  logging.info("Using batch size of " + str(batch_size) + " for training.")
  with tf.name_scope("train_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      raise IOError("Unable to find training files. data_pattern='" +
                    data_pattern + "'.")
    logging.info("Number of training files: %s.", str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, num_epochs=num_epochs, shuffle=False)
    training_data = reader.prepare_reader(filename_queue)

    return tf.train.batch(
        training_data,
        batch_size=batch_size,
        capacity=FLAGS.batch_size * 4,
        allow_smaller_final_batch=True,
        enqueue_many=True) 
Example #10
Source File: check_video_id.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def get_input_evaluation_tensors(reader,
                                 data_pattern,
                                 batch_size=256):
  logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
  with tf.name_scope("eval_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      print data_pattern, files
      raise IOError("Unable to find the evaluation files.")
    logging.info("number of evaluation files: " + str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, shuffle=False, num_epochs=1)
    eval_data = reader.prepare_reader(filename_queue)
    return tf.train.batch(
        eval_data,
        batch_size=batch_size,
        capacity=3 * batch_size,
        allow_smaller_final_batch=True,
        enqueue_many=True) 
Example #11
Source File: eval.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def get_input_evaluation_tensors(reader,
                                 data_pattern,
                                 batch_size=1024):

  logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
  with tf.name_scope("eval_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      raise IOError("Unable to find the evaluation files.")
    logging.info("number of evaluation files: " + str(len(files)))
    files.sort()
    filename_queue = tf.train.string_input_producer(
        files, shuffle=False, num_epochs=1)
    eval_data = reader.prepare_reader(filename_queue)
    return tf.train.batch(
        eval_data,
        batch_size=batch_size,
        capacity=3 * batch_size,
        allow_smaller_final_batch=True,
        enqueue_many=True) 
Example #12
Source File: main.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def validate(
        working_dir: 'tf.estimator working directory',
        *tf_record_dirs: 'Directories where holdout data are',
        checkpoint_name: 'Which checkpoint to evaluate (None=latest)'=None,
        validate_name: 'Name for validation set (i.e., selfplay or human)'=None):
    qmeas.start_time('validate')
    tf_records = []
    with timer("Building lists of holdout files"):
        for record_dir in tf_record_dirs:
            tf_records.extend(gfile.Glob(os.path.join(record_dir, '*.zz')))

    first_record = os.path.basename(tf_records[0])
    last_record = os.path.basename(tf_records[-1])
    with timer("Validating from {} to {}".format(first_record, last_record)):
        dual_net.validate(
            working_dir, tf_records, checkpoint_name=checkpoint_name,
            name=validate_name)
    qmeas.stop_time('validate') 
Example #13
Source File: main.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def validate(
        working_dir: 'tf.estimator working directory',
        *tf_record_dirs: 'Directories where holdout data are',
        checkpoint_name: 'Which checkpoint to evaluate (None=latest)'=None,
        validate_name: 'Name for validation set (i.e., selfplay or human)'=None):
    qmeas.start_time('validate')
    tf_records = []
    with timer("Building lists of holdout files"):
        for record_dir in tf_record_dirs:
            tf_records.extend(gfile.Glob(os.path.join(record_dir, '*.zz')))

    first_record = os.path.basename(tf_records[0])
    last_record = os.path.basename(tf_records[-1])
    with timer("Validating from {} to {}".format(first_record, last_record)):
        dual_net.validate(
            working_dir, tf_records, checkpoint_name=checkpoint_name,
            name=validate_name)
    qmeas.stop_time('validate') 
Example #14
Source File: controller.py    From Python-Reinforcement-Learning-Projects with MIT License 6 votes vote down vote up
def train():
    model_version, model_name = get_latest_model()
    logger.info("Training on gathered game data, initializing from {}".format(model_name))
    new_model_name = generate(model_version + 1)
    logger.info("New model will be {}".format(new_model_name))
    save_file = os.path.join(PATHS.MODELS_DIR, new_model_name)

    try:
        logger.info("Getting tf_records")
        tf_records = sorted(gfile.Glob(os.path.join(PATHS.TRAINING_CHUNK_DIR, '*.tfrecord.zz')))
        tf_records = tf_records[
                     -1 * (GLOBAL_PARAMETER_STORE.WINDOW_SIZE // GLOBAL_PARAMETER_STORE.EXAMPLES_PER_RECORD):]

        print("Training from:", tf_records[0], "to", tf_records[-1])

        with timer("Training"):
            network.train(PATHS.ESTIMATOR_WORKING_DIR, tf_records, model_version+1)
            network.export_latest_checkpoint_model(PATHS.ESTIMATOR_WORKING_DIR, save_file)

    except:
        logger.info("Got an error training")
        logging.exception("Train error") 
Example #15
Source File: inputs.py    From ffn with Apache License 2.0 6 votes vote down vote up
def create_filename_queue(coordinates_file_pattern, shuffle=True):
  """Creates a queue for reading coordinates from coordinate file.

  Args:
    coordinates_file_pattern: File pattern for TFRecords of
                              input examples of the form of a glob
                              pattern or path@shards.
    shuffle: Whether to shuffle the coordinate file list. Note that the expanded
             coordinates_file_pattern is not guaranteed to be sorted
             alphabetically.

  Returns:
    Tensorflow queue with coordinate filenames
  """
  m = re.search(r'@(\d{1,})', coordinates_file_pattern)
  if m:
    num_shards = int(m.group(1))
    coord_file_list = [
      re.sub(r'@(\d{1,})', '-%.5d-of-%.5d' % (i, num_shards),
             coordinates_file_pattern)
      for i in range(num_shards)]
  else:
    coord_file_list = gfile.Glob(coordinates_file_pattern)
  return tf.train.string_input_producer(coord_file_list, shuffle=shuffle) 
Example #16
Source File: cbt_models.py    From training with Apache License 2.0 5 votes vote down vote up
def get_model_data(glob, existing):
    """Read all model meta filenames and extract per model metadata."""

    globbed = sorted(gfile.Glob(glob))

    skipped = 0
    model_data = []
    for model_path in tqdm(globbed):
        assert model_path.lower().endswith(".meta"), model_path
        model_run, model_num, model_name = parse_model_components(model_path)
        row_name = MODEL_PREFIX.format(run=model_run, num=model_name)

        if row_name in existing:
            skipped += 1
            continue

        metadata = (
            (MODEL_NAME, model_name),
            (b"model_num", model_num),
            (b"run", model_run),
            (b"parent", ""),
            (b"tag", ""),
            (b"tool", "cbt_models_backfill_to_cbt"),
            (b"trained_date", ""),
        )
        model_data.append((row_name, metadata))

    print("Read {} Models, {} new, {} existing".format(
        len(globbed), len(model_data), skipped))
    return model_data 
Example #17
Source File: eval_custom.py    From youtube8mchallenge with Apache License 2.0 5 votes vote down vote up
def get_input_evaluation_tensors(reader,
                                 data_pattern,
                                 batch_size=1024,
                                 num_readers=1):
    """Creates the section of the graph which reads the evaluation data.

    Args:
      reader: A class which parses the training data.
      data_pattern: A 'glob' style path to the data files.
      batch_size: How many examples to process at a time.
      num_readers: How many I/O threads to use.

    Returns:
      A tuple containing the features tensor, labels tensor, and optionally a
      tensor containing the number of frames per video. The exact dimensions
      depend on the reader being used.

    Raises:
      IOError: If no files matching the given pattern were found.
    """
    logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
    with tf.name_scope("eval_input"):
        files = gfile.Glob(data_pattern)
        if not files:
            raise IOError("Unable to find the evaluation files.")
        logging.info("number of evaluation files: " + str(len(files)))
        filename_queue = tf.train.string_input_producer(
            files, shuffle=False, num_epochs=1)
        eval_data = [
            reader.prepare_reader(filename_queue) for _ in range(num_readers)
            ]
        return tf.train.batch_join(
            eval_data,
            batch_size=batch_size,
            capacity=3 * batch_size,
            allow_smaller_final_batch=True,
            enqueue_many=True) 
Example #18
Source File: convert_prediction_from_json_to_csv.py    From youtube8mchallenge with Apache License 2.0 5 votes vote down vote up
def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  if not FLAGS.json_prediction_files_pattern:
    raise ValueError(
        "The flag --json_prediction_files_pattern must be specified.")

  if not FLAGS.csv_output_file:
    raise ValueError("The flag --csv_output_file must be specified.")

  logging.info("Looking for prediction files with pattern: %s", 
               FLAGS.json_prediction_files_pattern)

  file_paths = gfile.Glob(FLAGS.json_prediction_files_pattern)  
  logging.info("Found files: %s", file_paths)

  logging.info("Writing submission file to: %s", FLAGS.csv_output_file)
  with gfile.Open(FLAGS.csv_output_file, "w+") as output_file:
    output_file.write(get_csv_header())

    for file_path in file_paths:
      logging.info("processing file: %s", file_path)

      with gfile.Open(file_path) as input_file:

        for line in input_file: 
          json_data = json.loads(line)
          output_file.write(to_csv_row(json_data))

    output_file.flush()
  logging.info("done") 
Example #19
Source File: inference_gpu.py    From youtube8mchallenge with Apache License 2.0 5 votes vote down vote up
def get_input_data_tensors(reader, data_pattern, batch_size, num_readers=1):
  """Creates the section of the graph which reads the input data.

  Args:
    reader: A class which parses the input data.
    data_pattern: A 'glob' style path to the data files.
    batch_size: How many examples to process at a time.
    num_readers: How many I/O threads to use.

  Returns:
    A tuple containing the features tensor, labels tensor, and optionally a
    tensor containing the number of frames per video. The exact dimensions
    depend on the reader being used.

  Raises:
    IOError: If no files matching the given pattern were found.
  """
  with tf.name_scope("input"):
    files = gfile.Glob(data_pattern)
    if not files:
      raise IOError("Unable to find input files. data_pattern='" +
                    data_pattern + "'")
    logging.info("number of input files: " + str(len(files)))
    filename_queue = tf.train.string_input_producer(
        files, num_epochs=1, shuffle=False)
    examples_and_labels = [reader.prepare_reader(filename_queue)
                           for _ in range(num_readers)]

    video_id_batch, video_batch, unused_labels, num_frames_batch = (
        tf.train.batch_join(examples_and_labels,
                            batch_size=batch_size,
                            allow_smaller_final_batch=True,
                            enqueue_many=True))
    return video_id_batch, video_batch, num_frames_batch 
Example #20
Source File: inference.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def get_input_data_tensors(reader, data_pattern, batch_size, num_readers=1):
  """Creates the section of the graph which reads the input data.

  Args:
    reader: A class which parses the input data.
    data_pattern: A 'glob' style path to the data files.
    batch_size: How many examples to process at a time.
    num_readers: How many I/O threads to use.

  Returns:
    A tuple containing the features tensor, labels tensor, and optionally a
    tensor containing the number of frames per video. The exact dimensions
    depend on the reader being used.

  Raises:
    IOError: If no files matching the given pattern were found.
  """
  with tf.name_scope("input"):
    files = gfile.Glob(data_pattern)
    if not files:
      raise IOError("Unable to find input files. data_pattern='" +
                    data_pattern + "'")
    logging.info("number of input files: " + str(len(files)))
    filename_queue = tf.train.string_input_producer(files,
                                                    num_epochs=1,
                                                    shuffle=False)
    examples_and_labels = [
        reader.prepare_reader(filename_queue) for _ in range(num_readers)
    ]

    input_data_dict = (tf.train.batch_join(examples_and_labels,
                                           batch_size=batch_size,
                                           allow_smaller_final_batch=True,
                                           enqueue_many=True))
    video_id_batch = input_data_dict["video_ids"]
    video_batch = input_data_dict["video_matrix"]
    num_frames_batch = input_data_dict["num_frames"]
    return video_id_batch, video_batch, num_frames_batch 
Example #21
Source File: convert_prediction_from_json_to_csv.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  if not FLAGS.json_prediction_files_pattern:
    raise ValueError(
        "The flag --json_prediction_files_pattern must be specified.")

  if not FLAGS.csv_output_file:
    raise ValueError("The flag --csv_output_file must be specified.")

  logging.info("Looking for prediction files with pattern: %s",
               FLAGS.json_prediction_files_pattern)

  file_paths = gfile.Glob(FLAGS.json_prediction_files_pattern)
  logging.info("Found files: %s", file_paths)

  logging.info("Writing submission file to: %s", FLAGS.csv_output_file)
  with gfile.Open(FLAGS.csv_output_file, "w+") as output_file:
    output_file.write(get_csv_header())

    for file_path in file_paths:
      logging.info("processing file: %s", file_path)

      with gfile.Open(file_path) as input_file:

        for line in input_file:
          json_data = json.loads(line)
          output_file.write(to_csv_row(json_data))

    output_file.flush()
  logging.info("done") 
Example #22
Source File: loop_train_eval.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def get_models():
    """Finds all models, returning a list of model number and names
    sorted increasing.

    Returns: [(13, 000013-modelname), (17, 000017-modelname), ...etc]
    """
    all_models = gfile.Glob(os.path.join(MODELS_DIR, '*.meta'))
    model_filenames = [os.path.basename(m) for m in all_models]
    model_numbers_names = sorted([
        (shipname.detect_model_num(m), shipname.detect_model_name(m))
        for m in model_filenames])
    return model_numbers_names 
Example #23
Source File: selfplay_worker.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def rl_loop():
    """Run the reinforcement learning loop

    This tries to create a realistic way to run the reinforcement learning with
    all default parameters.
    """

    if goparams.DUMMY_MODEL:
        # monkeypatch the hyperparams so that we get a quickly executing network.
        dual_net.get_default_hyperparams = lambda **kwargs: {
            'k': 8, 'fc_width': 16, 'num_shared_layers': 1, 'l2_strength': 1e-4, 'momentum': 0.9}

        dual_net.TRAIN_BATCH_SIZE = 16
        dual_net.EXAMPLES_PER_GENERATION = 64

        #monkeypatch the shuffle buffer size so we don't spin forever shuffling up positions.
        preprocessing.SHUFFLE_BUFFER_SIZE = 1000

    _, model_name = get_latest_model()
    network = selfplay_laod_model(model_name)

    if sys.argv[3]=='worker':
        selfplay_dir = os.path.join(SELFPLAY_DIR, model_name)
    else:
        selfplay_dir = SELFPLAY_BACKUP_DIR

    def count_games():
      # returns number of games in the selfplay directory
      if not os.path.exists(selfplay_dir):
        # directory not existing implies no games have been played yet
        return 0
      return len(gfile.Glob(os.path.join(selfplay_dir, '*.zz')))

    while count_games() < goparams.MAX_GAMES_PER_GENERATION and not os.path.isfile("PK_FLAG"):
      selfplay_cache_model(network, model_name, verbose=0)

    print('Stopping selfplay after finding {} games played.'.format(count_games())) 
Example #24
Source File: selfplay_worker.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def get_models():
    """Finds all models, returning a list of model number and names
    sorted increasing.

    Returns: [(13, 000013-modelname), (17, 000017-modelname), ...etc]
    """
    all_models = gfile.Glob(os.path.join(MODELS_DIR, '*.meta'))
    model_filenames = [os.path.basename(m) for m in all_models]
    model_numbers_names = sorted([
        (shipname.detect_model_num(m), shipname.detect_model_name(m))
        for m in model_filenames])
    return model_numbers_names 
Example #25
Source File: loop_selfplay.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def get_models():
    """Finds all models, returning a list of model number and names
    sorted increasing.

    Returns: [(13, 000013-modelname), (17, 000017-modelname), ...etc]
    """
    all_models = gfile.Glob(os.path.join(MODELS_DIR, '*.meta'))
    model_filenames = [os.path.basename(m) for m in all_models]
    model_numbers_names = sorted([
        (shipname.detect_model_num(m), shipname.detect_model_name(m))
        for m in model_filenames])
    return model_numbers_names 
Example #26
Source File: inference.py    From Youtube-8M-WILLOW with Apache License 2.0 5 votes vote down vote up
def get_input_data_tensors(reader, data_pattern, batch_size, num_readers=1):
  """Creates the section of the graph which reads the input data.

  Args:
    reader: A class which parses the input data.
    data_pattern: A 'glob' style path to the data files.
    batch_size: How many examples to process at a time.
    num_readers: How many I/O threads to use.

  Returns:
    A tuple containing the features tensor, labels tensor, and optionally a
    tensor containing the number of frames per video. The exact dimensions
    depend on the reader being used.

  Raises:
    IOError: If no files matching the given pattern were found.
  """
  with tf.name_scope("input"):
    files = gfile.Glob(data_pattern)
    if not files:
      raise IOError("Unable to find input files. data_pattern='" +
                    data_pattern + "'")
    logging.info("number of input files: " + str(len(files)))
    filename_queue = tf.train.string_input_producer(
        files, num_epochs=1, shuffle=False)
    examples_and_labels = [reader.prepare_reader(filename_queue)
                           for _ in range(num_readers)]

    video_id_batch, video_batch, unused_labels, num_frames_batch = (
        tf.train.batch_join(examples_and_labels,
                            batch_size=batch_size,
                            allow_smaller_final_batch = True,
                            enqueue_many=True))
    return video_id_batch, video_batch, num_frames_batch 
Example #27
Source File: main.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def train(
        working_dir: 'tf.estimator working directory.',
        chunk_dir: 'Directory where gathered training chunks are.',
        model_save_path: 'Where to export the completed generation.',
        generation_num: 'Which generation you are training.'=0):
    qmeas.start_time('train')
    tf_records = sorted(gfile.Glob(os.path.join(chunk_dir, '*.tfrecord.zz')))
    tf_records = tf_records[-1 * (WINDOW_SIZE // EXAMPLES_PER_RECORD):]

    print("Training from:", tf_records[0], "to", tf_records[-1])

    with timer("Training"):
        dual_net.train(working_dir, tf_records, generation_num)
        dual_net.export_model(working_dir, model_save_path)
    qmeas.stop_time('train') 
Example #28
Source File: training_curve.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def get_model_paths(model_dir):
    '''Returns all model paths in the model_dir.'''
    all_models = gfile.Glob(os.path.join(model_dir, '*.meta'))
    model_filenames = [os.path.basename(m) for m in all_models]
    model_numbers_names = [
        (shipname.detect_model_num(m), shipname.detect_model_name(m))
        for m in model_filenames]
    model_names = sorted(model_numbers_names)
    return [os.path.join(model_dir, name[1]) for name in model_names] 
Example #29
Source File: fsdb.py    From training with Apache License 2.0 5 votes vote down vote up
def get_models():
    """Finds all models, returning a list of model number and names
    sorted increasing.

    Returns: [(13, 000013-modelname), (17, 000017-modelname), ...etc]
    """
    all_models = gfile.Glob(os.path.join(models_dir(), '*.meta'))
    model_filenames = [os.path.basename(m) for m in all_models]
    model_numbers_names = sorted([
        (shipname.detect_model_num(m), shipname.detect_model_name(m))
        for m in model_filenames])
    return model_numbers_names 
Example #30
Source File: loop_train_eval.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def get_models():
    """Finds all models, returning a list of model number and names
    sorted increasing.

    Returns: [(13, 000013-modelname), (17, 000017-modelname), ...etc]
    """
    all_models = gfile.Glob(os.path.join(MODELS_DIR, '*.meta'))
    model_filenames = [os.path.basename(m) for m in all_models]
    model_numbers_names = sorted([
        (shipname.detect_model_num(m), shipname.detect_model_name(m))
        for m in model_filenames])
    return model_numbers_names