Python tensorflow.gfile.Open() Examples

The following are 30 code examples of tensorflow.gfile.Open(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.gfile , or try the search function .
Example #1
Source File: dsprites.py    From disentanglement_lib with Apache License 2.0 6 votes vote down vote up
def __init__(self, latent_factor_indices=None):
    # By default, all factors (including shape) are considered ground truth
    # factors.
    if latent_factor_indices is None:
      latent_factor_indices = list(range(6))
    self.latent_factor_indices = latent_factor_indices
    self.data_shape = [64, 64, 1]
    # Load the data so that we can sample from it.
    with gfile.Open(DSPRITES_PATH, "rb") as data_file:
      # Data was saved originally using python2, so we need to set the encoding.
      data = np.load(data_file, encoding="latin1", allow_pickle=True)
      self.images = np.array(data["imgs"])
      self.factor_sizes = np.array(
          data["metadata"][()]["latents_sizes"], dtype=np.int64)
    self.full_factor_sizes = [1, 3, 6, 40, 32, 32]
    self.factor_bases = np.prod(self.factor_sizes) / np.cumprod(
        self.factor_sizes)
    self.state_space = util.SplitDiscreteStateSpace(self.factor_sizes,
                                                    self.latent_factor_indices) 
Example #2
Source File: vocabulary.py    From transformer-xl with Apache License 2.0 6 votes vote down vote up
def encode_file(self, path, ordered=False, verbose=False, add_eos=True,
          add_double_eos=False):
    if verbose: print('encoding file {} ...'.format(path))
    assert exists(path)
    encoded = []
    with open(path, 'r') as f:
      for idx, line in enumerate(f):
        if verbose and idx > 0 and idx % 500000 == 0:
          print('  line {}'.format(idx))
        symbols = self.tokenize(line, add_eos=add_eos,
          add_double_eos=add_double_eos)
        encoded.append(self.convert_to_nparray(symbols))

    if ordered:
      encoded = np.concatenate(encoded)

    return encoded 
Example #3
Source File: utils.py    From cloudml-samples with Apache License 2.0 6 votes vote down vote up
def read_df_from_gcs(file_pattern):
  """Read data from Google Cloud Storage, split into train and validation sets.

  Assume that the data on GCS is in csv format without header.
  The column names will be provided through metadata

  Args:
    file_pattern: (string) pattern of the files containing training data.
    For example: [gs://bucket/folder_name/prefix]

  Returns:
    pandas.DataFrame
  """

  # Download the files to local /tmp/ folder
  df_list = []

  for filepath in gfile.Glob(file_pattern):
    with gfile.Open(filepath, 'r') as f:
      # Assume there is no header
      df_list.append(pd.read_csv(f, names=metadata.CSV_COLUMNS))

  data_df = pd.concat(df_list)

  return data_df 
Example #4
Source File: reference_implementation.py    From training with Apache License 2.0 6 votes vote down vote up
def train(state, tf_records):
  """Run training and write a new model to the fsdb models_dir.

  Args:
    state: the RL loop State instance.
    tf_records: a list of paths to TensorFlow records to train on.
  """

  model_path = os.path.join(fsdb.models_dir(), state.train_model_name)
  await run(
      'python3', 'train.py', *tf_records,
      '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')),
      '--work_dir={}'.format(fsdb.working_dir()),
      '--export_path={}'.format(model_path),
      '--training_seed={}'.format(state.seed),
      '--freeze=true')
  # Append the time elapsed from when the RL was started to when this model
  # was trained.
  elapsed = time.time() - state.start_time
  timestamps_path = os.path.join(fsdb.models_dir(), 'train_times.txt')
  with gfile.Open(timestamps_path, 'a') as f:
    print('{:.3f} {}'.format(elapsed, state.train_model_name), file=f) 
Example #5
Source File: reference_implementation.py    From training with Apache License 2.0 6 votes vote down vote up
def run(*cmd):
  """Run the given subprocess command in a coroutine.

  Args:
    *cmd: the command to run and its arguments.

  Returns:
    The output that the command wrote to stdout as a list of strings, one line
    per element (stderr output is piped to stdout).

  Raises:
    RuntimeError: if the command returns a non-zero result.
  """

  stdout = await checked_run(*cmd)

  log_path = os.path.join(FLAGS.base_dir, get_cmd_name(cmd) + '.log')
  with gfile.Open(log_path, 'a') as f:
    f.write(expand_cmd_str(cmd))
    f.write('\n')
    f.write(stdout)
    f.write('\n')

  # Split stdout into lines.
  return stdout.split('\n') 
Example #6
Source File: alias_generator.py    From training with Apache License 2.0 6 votes vote down vote up
def run_real_data():
    print("Starting on real data.")
    metadata_path = "{}_train_metadata.pkl".format(_PREFIX)
    with Open(metadata_path, "rb") as f:
        train_metadata = pickle.load(f)
    num_items = train_metadata.num_cols
    print("num_items:", num_items)

    st = timeit.default_timer()
    sampler_cache = _PREFIX + "cached_sampler.pkl"
    if os.path.exists(sampler_cache):
      print("Using cache: {}".format(sampler_cache))
      with open(sampler_cache, "rb") as f:
        sampler, pos_users, pos_items = pickle.load(f)
    else:
      sampler, pos_users, pos_items = process_data(num_items=num_items, min_items_per_user=1, iter_fn=iter_data)
      with open(sampler_cache, "wb") as f:
        pickle.dump([sampler, pos_users, pos_items], f, pickle.HIGHEST_PROTOCOL)
    preproc_time = timeit.default_timer() - st
    num_users = len(sampler.num_regions)
    print("num_users:", num_users)
    print("Preprocessing complete: {:.1f} sec".format(preproc_time))
    print()

    _ = profile_sampler(sampler=sampler, batch_size=int(1e6), num_batches=1000, num_users=num_users) 
Example #7
Source File: vocabulary.py    From transformer-xl-chinese with Apache License 2.0 6 votes vote down vote up
def count_file(self, path, verbose=False, add_eos=False):
        if verbose: print('counting file {} ...'.format(path))
        assert exists(path)

        sents = []
        with open(path, 'r') as f:
            for idx, line in enumerate(f):
                if verbose and idx > 0 and idx % 500000 == 0:
                    print('  line {}'.format(idx))
                symbols = self.tokenize(line, add_eos=True)
                self.counter.update(symbols)
                sents.append(symbols)

        return sents

    # 更新counter 中的token 
Example #8
Source File: utils.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def read_df_from_gcs(file_pattern):
  """Read data from Google Cloud Storage, split into train and validation sets.

  Assume that the data on GCS is in csv format without header.
  The column names will be provided through metadata

  Args:
    file_pattern: (string) pattern of the files containing training data.
    For example: [gs://bucket/folder_name/prefix]

  Returns:
    pandas.DataFrame
  """

  # Download the files to local /tmp/ folder
  df_list = []

  for filepath in gfile.Glob(file_pattern):
    with gfile.Open(filepath, 'r') as f:
      # Assume there is no header
      df_list.append(pd.read_csv(f, names=metadata.CSV_COLUMNS))

  data_df = pd.concat(df_list)

  return data_df 
Example #9
Source File: vocabulary.py    From transformer-xl-chinese with Apache License 2.0 6 votes vote down vote up
def encode_file(self, path, ordered=False, verbose=False,
                    add_double_eos=False):
        if verbose: print('encoding file {} ...'.format(path))
        assert exists(path)
        encoded = []
        with open(path, 'r') as f:
            for idx, line in enumerate(f):
                if verbose and idx > 0 and idx % 500000 == 0:
                    print('  line {}'.format(idx))
                symbols = self.tokenize(line, add_eos=True, add_double_eos=add_double_eos)

                encoded.append(self.convert_to_nparray(symbols))

        if ordered:
            encoded = np.concatenate(encoded)

        return encoded

    # 
Example #10
Source File: old_vocabulary.py    From transformer-xl-chinese with Apache License 2.0 6 votes vote down vote up
def count_file(self, path, verbose=False, add_eos=False):
        if verbose: print('counting file {} ...'.format(path))
        assert exists(path)

        sents = []
        with open(path, 'r') as f:
            for idx, line in enumerate(f):
                if verbose and idx > 0 and idx % 500000 == 0:
                    print('  line {}'.format(idx))
                symbols = self.tokenize(line, add_eos=add_eos)
                self.counter.update(symbols)
                sents.append(symbols)

        return sents

    # 更新counter 中的token 
Example #11
Source File: aggregate_results.py    From disentanglement_lib with Apache License 2.0 5 votes vote down vote up
def aggregate_results_to_json(result_file_pattern, output_path):
  """Aggregates all the results files in the pattern into a single JSON file.

  Args:
    result_file_pattern: String with glob pattern to all the result files that
      should be aggregated (e.g. /tmp/*/results/aggregate/evaluation.json).
    output_path: String with path to output json file (e.g. /tmp/results.json).
  """
  logging.info("Loading the results.")
  model_results = _get(result_file_pattern)
  logging.info("Saving the aggregated results.")
  with gfile.Open(output_path, "w") as f:
    model_results.to_json(path_or_buf=f) 
Example #12
Source File: storage.py    From ffn with Apache License 2.0 5 votes vote down vote up
def load_origins(segmentation_dir, corner):
  target_path = get_existing_subvolume_path(segmentation_dir, corner, False)
  if target_path is None:
    raise ValueError('Segmentation not found: %s, %s' % (segmentation_dir,
                                                         corner))

  with gfile.Open(target_path, 'rb') as f:
    data = np.load(f)
    return data['origins'].item() 
Example #13
Source File: deep_edge_trainer.py    From asymproj_edge_dnn with Apache License 2.0 5 votes vote down vote up
def InFile(suffix):
  """Opens file `ModelFileName(suffix)` for reading."""
  return gfile.Open(ModelFileName(suffix)) 
Example #14
Source File: deep_edge_trainer.py    From asymproj_edge_dnn with Apache License 2.0 5 votes vote down vote up
def OutFile(suffix):
  """Opens file `ModelFileName(suffix)` for writing."""
  return gfile.Open(ModelFileName(suffix), 'w') 
Example #15
Source File: deep_edge_trainer.py    From asymproj_edge_dnn with Apache License 2.0 5 votes vote down vote up
def __init__(self, positive_pairs_file, negative_pairs_file):
    self.pos_data = numpy.load(gfile.Open(positive_pairs_file))
    self.neg_data = numpy.load(gfile.Open(negative_pairs_file)) 
Example #16
Source File: deep_edge_trainer.py    From asymproj_edge_dnn with Apache License 2.0 5 votes vote down vote up
def __init__(self, train_negatives_file):
    train_negatives_arr = numpy.load(gfile.Open(train_negatives_file))
    self.negatives_dict = collections.defaultdict(list)
    for n1, n2 in train_negatives_arr:
      self.negatives_dict[n1].append(n2)
      # self.negatives_dict[n2].append(n1) 
Example #17
Source File: deep_edge_trainer.py    From asymproj_edge_dnn with Apache License 2.0 5 votes vote down vote up
def next_pairs_array(self):
    arr = numpy.load(gfile.Open(self.train_npy_files[self.next_idx]))
    indices = range(len(arr))
    random.shuffle(indices)
    arr = arr[indices]
    self.next_idx = (self.next_idx + 1) % len(self.train_npy_files)
    return arr 
Example #18
Source File: utils.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def dump_object(object_to_dump, output_path):
  """Pickle the object and save to the output_path.

  Args:
    object_to_dump: Python object to be pickled
    output_path: (string) output path which can be Google Cloud Storage

  Returns:
    None
  """

  if not gfile.Exists(output_path):
    gfile.MakeDirs(os.path.dirname(output_path))
  with gfile.Open(output_path, 'w') as wf:
    joblib.dump(object_to_dump, wf) 
Example #19
Source File: preprocess.py    From deepmass with Apache License 2.0 5 votes vote down vote up
def main(unused_argv):

  # Get one-hot encoding.
  mol_weights = pd.Series(_MOL_WEIGHTS)
  alphabet = [k for k in mol_weights.keys() if not k.startswith(_GROUP)]
  alphabet = sorted(alphabet)
  one_hot_encoding = pd.get_dummies(alphabet).astype(int).to_dict(orient='list')

  with gfile.Open(FLAGS.input_data) as inputf:
    input_data = pd.read_csv(inputf, sep=',')
  input_data.rename(
      columns={FLAGS.sequence_col: _MOD_SEQUENCE,
               FLAGS.charge_col: _CHARGE,
               FLAGS.fragmentation_col: _FRAGMENTATION,
               FLAGS.analyzer_col: _MASS_ANALYZER},
      inplace=True)

  metadata, _ = preprocess_peptides(input_data, FLAGS.clean_peptides)
  metadata = metadata.reset_index()

  check_inputs(metadata, alphabet)

  
  # length.
  json_inputs = generate_json_inputs(metadata, one_hot_encoding)
  with gfile.Open(
      os.path.join(FLAGS.output_data_dir, 'input.json'), 'w') as outf:
    for json_input in json_inputs:
      outf.write(json.dumps(json_input) + '\n')
  with gfile.Open(
      os.path.join(FLAGS.output_data_dir, 'metadata.tsv'), 'w') as outf:
    metadata.to_csv(outf, sep='\t') 
Example #20
Source File: convert_prediction_from_json_to_csv.py    From Y8M with Apache License 2.0 5 votes vote down vote up
def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  if not FLAGS.json_prediction_files_pattern:
    raise ValueError(
        "The flag --json_prediction_files_pattern must be specified.")

  if not FLAGS.csv_output_file:
    raise ValueError("The flag --csv_output_file must be specified.")

  logging.info("Looking for prediction files with pattern: %s", 
               FLAGS.json_prediction_files_pattern)

  file_paths = gfile.Glob(FLAGS.json_prediction_files_pattern)  
  logging.info("Found files: %s", file_paths)

  logging.info("Writing submission file to: %s", FLAGS.csv_output_file)
  with gfile.Open(FLAGS.csv_output_file, "w+") as output_file:
    output_file.write(get_csv_header())

    for file_path in file_paths:
      logging.info("processing file: %s", file_path)

      with gfile.Open(file_path) as input_file:

        for line in input_file: 
          json_data = json.loads(line)
          output_file.write(to_csv_row(json_data))

    output_file.flush()
  logging.info("done") 
Example #21
Source File: convert_prediction_from_json_to_csv.py    From Y8M with Apache License 2.0 5 votes vote down vote up
def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  if not FLAGS.json_prediction_files_pattern:
    raise ValueError(
        "The flag --json_prediction_files_pattern must be specified.")

  if not FLAGS.csv_output_file:
    raise ValueError("The flag --csv_output_file must be specified.")

  logging.info("Looking for prediction files with pattern: %s", 
               FLAGS.json_prediction_files_pattern)

  file_paths = gfile.Glob(FLAGS.json_prediction_files_pattern)  
  logging.info("Found files: %s", file_paths)

  logging.info("Writing submission file to: %s", FLAGS.csv_output_file)
  with gfile.Open(FLAGS.csv_output_file, "w+") as output_file:
    output_file.write(get_csv_header())

    for file_path in file_paths:
      logging.info("processing file: %s", file_path)

      with gfile.Open(file_path) as input_file:

        for line in input_file: 
          json_data = json.loads(line)
          output_file.write(to_csv_row(json_data))

    output_file.flush()
  logging.info("done") 
Example #22
Source File: convert_prediction_from_json_to_csv.py    From Y8M with Apache License 2.0 5 votes vote down vote up
def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  if not FLAGS.json_prediction_files_pattern:
    raise ValueError(
        "The flag --json_prediction_files_pattern must be specified.")

  if not FLAGS.csv_output_file:
    raise ValueError("The flag --csv_output_file must be specified.")

  logging.info("Looking for prediction files with pattern: %s", 
               FLAGS.json_prediction_files_pattern)

  file_paths = gfile.Glob(FLAGS.json_prediction_files_pattern)  
  logging.info("Found files: %s", file_paths)

  logging.info("Writing submission file to: %s", FLAGS.csv_output_file)
  with gfile.Open(FLAGS.csv_output_file, "w+") as output_file:
    output_file.write(get_csv_header())

    for file_path in file_paths:
      logging.info("processing file: %s", file_path)

      with gfile.Open(file_path) as input_file:

        for line in input_file: 
          json_data = json.loads(line)
          output_file.write(to_csv_row(json_data))

    output_file.flush()
  logging.info("done") 
Example #23
Source File: selector_keras.py    From active-qa with Apache License 2.0 5 votes vote down vote up
def load(self, name):
    checkpoint_path_json, checkpoint_path_h5 = self._get_checkpoint_paths(name)
    with gfile.Open(checkpoint_path_json, 'r') as json_file:
      loaded_model_json = json_file.read()
    model = model_from_json(loaded_model_json)
    gfile.Copy(checkpoint_path_h5, '/tmp/tmp_model_weights.h5')
    model.load_weights('/tmp/tmp_model_weights.h5')
    logging.info('Loaded model from disk.')
    return model 
Example #24
Source File: selector_keras.py    From active-qa with Apache License 2.0 5 votes vote down vote up
def save(self, name):
    checkpoint_path_json, checkpoint_path_h5 = self._get_checkpoint_paths(name)
    model_json = self.model.to_json()
    with gfile.Open(checkpoint_path_json, 'w') as json_file:
      json_file.write(model_json)
    self.model.save_weights('/tmp/tmp_model_weights.h5')
    gfile.Copy('/tmp/tmp_model_weights.h5', checkpoint_path_h5) 
Example #25
Source File: dsprites.py    From disentanglement_lib with Apache License 2.0 5 votes vote down vote up
def __init__(self, latent_factor_indices=None):
    DSprites.__init__(self, latent_factor_indices)
    self.data_shape = [64, 64, 3]
    with gfile.Open(SCREAM_PATH, "rb") as f:
      scream = PIL.Image.open(f)
      scream.thumbnail((350, 274, 3))
      self.scream = np.array(scream) * 1. / 255. 
Example #26
Source File: convert_prediction_from_json_to_csv.py    From youtube8mchallenge with Apache License 2.0 5 votes vote down vote up
def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)

  if not FLAGS.json_prediction_files_pattern:
    raise ValueError(
        "The flag --json_prediction_files_pattern must be specified.")

  if not FLAGS.csv_output_file:
    raise ValueError("The flag --csv_output_file must be specified.")

  logging.info("Looking for prediction files with pattern: %s", 
               FLAGS.json_prediction_files_pattern)

  file_paths = gfile.Glob(FLAGS.json_prediction_files_pattern)  
  logging.info("Found files: %s", file_paths)

  logging.info("Writing submission file to: %s", FLAGS.csv_output_file)
  with gfile.Open(FLAGS.csv_output_file, "w+") as output_file:
    output_file.write(get_csv_header())

    for file_path in file_paths:
      logging.info("processing file: %s", file_path)

      with gfile.Open(file_path) as input_file:

        for line in input_file: 
          json_data = json.loads(line)
          output_file.write(to_csv_row(json_data))

    output_file.flush()
  logging.info("done") 
Example #27
Source File: vocabulary.py    From transformer-xl with Apache License 2.0 5 votes vote down vote up
def _build_from_file(self, vocab_file):
    self.idx2sym = []
    self.sym2idx = OrderedDict()

    with open(vocab_file, 'r') as f:
      for line in f:
        symb = line.strip().split()[0]
        self.add_symbol(symb)
    self.unk_idx = self.sym2idx['<UNK>'] 
Example #28
Source File: vocabulary.py    From transformer-xl with Apache License 2.0 5 votes vote down vote up
def count_file(self, path, verbose=False, add_eos=False):
    if verbose: print('counting file {} ...'.format(path))
    assert exists(path)

    sents = []
    with open(path, 'r') as f:
      for idx, line in enumerate(f):
        if verbose and idx > 0 and idx % 500000 == 0:
          print('  line {}'.format(idx))
        symbols = self.tokenize(line, add_eos=add_eos)
        self.counter.update(symbols)
        sents.append(symbols)

    return sents 
Example #29
Source File: old_vocabulary.py    From transformer-xl-chinese with Apache License 2.0 5 votes vote down vote up
def _build_from_file(self, vocab_file):
        self.idx2sym = []
        self.sym2idx = OrderedDict()

        with open(vocab_file, 'r') as f:
            for line in f:
                symb = line.strip().split()[0]
                self.add_symbol(symb)
        self.unk_idx = self.sym2idx['<UNK>'] 
Example #30
Source File: cars3d.py    From disentanglement_lib with Apache License 2.0 5 votes vote down vote up
def _load_mesh(filename):
  """Parses a single source file and rescales contained images."""
  with gfile.Open(os.path.join(CARS3D_PATH, filename), "rb") as f:
    mesh = np.einsum("abcde->deabc", sio.loadmat(f)["im"])
  flattened_mesh = mesh.reshape((-1,) + mesh.shape[2:])
  rescaled_mesh = np.zeros((flattened_mesh.shape[0], 64, 64, 3))
  for i in range(flattened_mesh.shape[0]):
    pic = PIL.Image.fromarray(flattened_mesh[i, :, :, :])
    pic.thumbnail((64, 64, 3), PIL.Image.ANTIALIAS)
    rescaled_mesh[i, :, :, :] = np.array(pic)
  return rescaled_mesh * 1. / 255