Python tensorflow.gfile.Open() Examples

The following are 30 code examples of tensorflow.gfile.Open(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.gfile , or try the search function

Example #1

Source File: dsprites.py From disentanglement_lib with Apache License 2.0

6 votes

def __init__(self, latent_factor_indices=None):
    # By default, all factors (including shape) are considered ground truth
    # factors.
    if latent_factor_indices is None:
      latent_factor_indices = list(range(6))
    self.latent_factor_indices = latent_factor_indices
    self.data_shape = [64, 64, 1]
    # Load the data so that we can sample from it.
    with gfile.Open(DSPRITES_PATH, "rb") as data_file:
      # Data was saved originally using python2, so we need to set the encoding.
      data = np.load(data_file, encoding="latin1", allow_pickle=True)
      self.images = np.array(data["imgs"])
      self.factor_sizes = np.array(
          data["metadata"][()]["latents_sizes"], dtype=np.int64)
    self.full_factor_sizes = [1, 3, 6, 40, 32, 32]
    self.factor_bases = np.prod(self.factor_sizes) / np.cumprod(
        self.factor_sizes)
    self.state_space = util.SplitDiscreteStateSpace(self.factor_sizes,
                                                    self.latent_factor_indices)

Example #2

Source File: vocabulary.py From transformer-xl with Apache License 2.0

6 votes

def encode_file(self, path, ordered=False, verbose=False, add_eos=True,
          add_double_eos=False):
    if verbose: print('encoding file {} ...'.format(path))
    assert exists(path)
    encoded = []
    with open(path, 'r') as f:
      for idx, line in enumerate(f):
        if verbose and idx > 0 and idx % 500000 == 0:
          print('  line {}'.format(idx))
        symbols = self.tokenize(line, add_eos=add_eos,
          add_double_eos=add_double_eos)
        encoded.append(self.convert_to_nparray(symbols))

    if ordered:
      encoded = np.concatenate(encoded)

    return encoded

Example #3

Source File: utils.py From cloudml-samples with Apache License 2.0

6 votes

def read_df_from_gcs(file_pattern):
  """Read data from Google Cloud Storage, split into train and validation sets.

  Assume that the data on GCS is in csv format without header.
  The column names will be provided through metadata

  Args:
    file_pattern: (string) pattern of the files containing training data.
    For example: [gs://bucket/folder_name/prefix]

  Returns:
    pandas.DataFrame
  """

  # Download the files to local /tmp/ folder
  df_list = []

  for filepath in gfile.Glob(file_pattern):
    with gfile.Open(filepath, 'r') as f:
      # Assume there is no header
      df_list.append(pd.read_csv(f, names=metadata.CSV_COLUMNS))

  data_df = pd.concat(df_list)

  return data_df

Example #4

Source File: reference_implementation.py From training with Apache License 2.0

6 votes

def train(state, tf_records):
  """Run training and write a new model to the fsdb models_dir.

  Args:
    state: the RL loop State instance.
    tf_records: a list of paths to TensorFlow records to train on.
  """

  model_path = os.path.join(fsdb.models_dir(), state.train_model_name)
  await run(
      'python3', 'train.py', *tf_records,
      '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
      '--work_dir={}'.format(fsdb.working_dir()),
      '--export_path={}'.format(model_path),
      '--training_seed={}'.format(state.seed),
      '--freeze=true')
  # Append the time elapsed from when the RL was started to when this model
  # was trained.
  elapsed = time.time() - state.start_time
  timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt')
  with gfile.Open(timestamps_path, 'a') as f:
    print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f)

Example #5

Source File: reference_implementation.py From training with Apache License 2.0

6 votes

def run(*cmd):
  """Run the given subprocess command in a coroutine.

  Args:
    *cmd: the command to run and its arguments.

  Returns:
    The output that the command wrote to stdout as a list of strings, one line
    per element (stderr output is piped to stdout).

  Raises:
    RuntimeError: if the command returns a non-zero result.
  """

  stdout = await checked_run(*cmd)

  log_path = os.path.join(FLAGS.base_dir, get_cmd_name(cmd) + '.log')
  with gfile.Open(log_path, 'a') as f:
    f.write(expand_cmd_str(cmd))
    f.write('\n')
    f.write(stdout)
    f.write('\n')

  # Split stdout into lines.
  return stdout.split('\n')

Example #6

Source File: alias_generator.py From training with Apache License 2.0

6 votes

def run_real_data():
    print("Starting on real data.")
    metadata_path = "{}_train_metadata.pkl".format(_PREFIX)
    with Open(metadata_path, "rb") as f:
        train_metadata = pickle.load(f)
    num_items = train_metadata.num_cols
    print("num_items:", num_items)

    st = timeit.default_timer()
    sampler_cache = _PREFIX + "cached_sampler.pkl"
    if os.path.exists(sampler_cache):
      print("Using cache: {}".format(sampler_cache))
      with open(sampler_cache, "rb") as f:
        sampler, pos_users, pos_items = pickle.load(f)
    else:
      sampler, pos_users, pos_items = process_data(num_items=num_items, min_items_per_user=1, iter_fn=iter_data)
      with open(sampler_cache, "wb") as f:
        pickle.dump([sampler, pos_users, pos_items], f, pickle.HIGHEST_PROTOCOL)
    preproc_time = timeit.default_timer() - st
    num_users = len(sampler.num_regions)
    print("num_users:", num_users)
    print("Preprocessing complete: {:.1f} sec".format(preproc_time))
    print()

    _ = profile_sampler(sampler=sampler, batch_size=int(1e6), num_batches=1000, num_users=num_users)

Example #7

Source File: vocabulary.py From transformer-xl-chinese with Apache License 2.0

6 votes

def count_file(self, path, verbose=False, add_eos=False):
        if verbose: print('counting file {} ...'.format(path))
        assert exists(path)

        sents = []
        with open(path, 'r') as f:
            for idx, line in enumerate(f):
                if verbose and idx > 0 and idx % 500000 == 0:
                    print('  line {}'.format(idx))
                symbols = self.tokenize(line, add_eos=True)
                self.counter.update(symbols)
                sents.append(symbols)

        return sents

    # 更新counter 中的token

Example #8

Source File: utils.py From professional-services with Apache License 2.0

6 votes

def read_df_from_gcs(file_pattern):
  """Read data from Google Cloud Storage, split into train and validation sets.

  Assume that the data on GCS is in csv format without header.
  The column names will be provided through metadata

  Args:
    file_pattern: (string) pattern of the files containing training data.
    For example: [gs://bucket/folder_name/prefix]

  Returns:
    pandas.DataFrame
  """

  # Download the files to local /tmp/ folder
  df_list = []

  for filepath in gfile.Glob(file_pattern):
    with gfile.Open(filepath, 'r') as f:
      # Assume there is no header
      df_list.append(pd.read_csv(f, names=metadata.CSV_COLUMNS))

  data_df = pd.concat(df_list)

  return data_df

Example #9

Source File: vocabulary.py From transformer-xl-chinese with Apache License 2.0

6 votes

def encode_file(self, path, ordered=False, verbose=False,
                    add_double_eos=False):
        if verbose: print('encoding file {} ...'.format(path))
        assert exists(path)
        encoded = []
        with open(path, 'r') as f:
            for idx, line in enumerate(f):
                if verbose and idx > 0 and idx % 500000 == 0:
                    print('  line {}'.format(idx))
                symbols = self.tokenize(line, add_eos=True, add_double_eos=add_double_eos)

                encoded.append(self.convert_to_nparray(symbols))

        if ordered:
            encoded = np.concatenate(encoded)

        return encoded

    #

Example #10

Source File: old_vocabulary.py From transformer-xl-chinese with Apache License 2.0

6 votes

def count_file(self, path, verbose=False, add_eos=False):
        if verbose: print('counting file {} ...'.format(path))
        assert exists(path)

        sents = []
        with open(path, 'r') as f:
            for idx, line in enumerate(f):
                if verbose and idx > 0 and idx % 500000 == 0:
                    print('  line {}'.format(idx))
                symbols = self.tokenize(line, add_eos=add_eos)
                self.counter.update(symbols)
                sents.append(symbols)

        return sents

    # 更新counter 中的token

Example #11

Source File: aggregate_results.py From disentanglement_lib with Apache License 2.0

5 votes

def aggregate_results_to_json(result_file_pattern, output_path):
  """Aggregates all the results files in the pattern into a single JSON file.

  Args:
    result_file_pattern: String with glob pattern to all the result files that
      should be aggregated (e.g. /tmp/*/results/aggregate/evaluation.json).
    output_path: String with path to output json file (e.g. /tmp/results.json).
  """
  logging.info("Loading the results.")
  model_results = _get(result_file_pattern)
  logging.info("Saving the aggregated results.")
  with gfile.Open(output_path, "w") as f:
    model_results.to_json(path_or_buf=f)

Example #12

Source File: storage.py From ffn with Apache License 2.0

5 votes

def load_origins(segmentation_dir, corner):
  target_path = get_existing_subvolume_path(segmentation_dir, corner, False)
  if target_path is None:
    raise ValueError('Segmentation not found: %s, %s' % (segmentation_dir,
                                                         corner))

  with gfile.Open(target_path, 'rb') as f:
    data = np.load(f)
    return data['origins'].item()

Example #13