Python tensorflow.example() Examples

The following are 15 code examples of tensorflow.example(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function

Example #1

Source File: feature_transforms.py From pydatalab with Apache License 2.0

6 votes

def read_vocab_file(file_path):
  """Reads a vocab file to memeory.

  Args:
    file_path: Each line of the vocab is in the form "token,example_count"

  Returns:
    Two lists, one for the vocab, and one for just the example counts.
  """
  with file_io.FileIO(file_path, 'r') as f:
    vocab_pd = pd.read_csv(
        f,
        header=None,
        names=['vocab', 'count'],
        dtype=str,  # Prevent pd from converting numerical categories.
        na_filter=False)  # Prevent pd from converting 'NA' to a NaN.

  vocab = vocab_pd['vocab'].tolist()
  ex_count = vocab_pd['count'].astype(int).tolist()

  return vocab, ex_count

Example #2

Source File: test_training.py From pydatalab with Apache License 2.0

6 votes

def _run_training_transform(self, problem_type, model_type, extra_args=[]):
    """Runs training starting with transformed tf.example files.

    Args:
      problem_type: 'regression' or 'classification'
      model_type: 'linear' or 'dnn'
      extra_args: list of strings to pass to the trainer.
    """
    cmd = ['cd %s && ' % CODE_PATH,
           'python -m trainer.task',
           '--train=' + os.path.join(self._transform_output, 'features_train*'),
           '--eval=' + os.path.join(self._transform_output, 'features_eval*'),
           '--job-dir=' + self._train_output,
           '--analysis=' + self._analysis_output,
           '--model=%s_%s' % (model_type, problem_type),
           '--train-batch-size=100',
           '--eval-batch-size=50',
           '--max-steps=' + str(self._max_steps)] + extra_args

    self._logger.debug('Running subprocess: %s \n\n' % ' '.join(cmd))
    subprocess.check_call(' '.join(cmd), shell=True)

Example #3

Source File: transform.py From pydatalab with Apache License 2.0

5 votes

def serialize_example(transformed_json_data, info_dict):
  """Makes a serialized tf.example.

  Args:
    transformed_json_data: dict of transformed data.
    info_dict: output of feature_transforms.get_transfrormed_feature_info()

  Returns:
    The serialized tf.example version of transformed_json_data.
  """
  import six
  import tensorflow as tf

  def _make_int64_list(x):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=x))
  def _make_bytes_list(x):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=x))
  def _make_float_list(x):
    return tf.train.Feature(float_list=tf.train.FloatList(value=x))

  if sorted(six.iterkeys(transformed_json_data)) != sorted(six.iterkeys(info_dict)):
    raise ValueError('Keys do not match %s, %s' % (list(six.iterkeys(transformed_json_data)),
                     list(six.iterkeys(info_dict))))

  ex_dict = {}
  for name, info in six.iteritems(info_dict):
    if info['dtype'] == tf.int64:
      ex_dict[name] = _make_int64_list(transformed_json_data[name])
    elif info['dtype'] == tf.float32:
      ex_dict[name] = _make_float_list(transformed_json_data[name])
    elif info['dtype'] == tf.string:
      ex_dict[name] = _make_bytes_list(transformed_json_data[name])      
    else:
      raise ValueError('Unsupported data type %s' % info['dtype'])

  ex = tf.train.Example(features=tf.train.Features(feature=ex_dict))
  return ex.SerializeToString()

Example #4

Source File: embedding_bert_intent_estimator_classifier.py From rasa_nlu_gq with Apache License 2.0

5 votes

def _check_tensorflow():
        if tf is None:
            raise ImportError(
                'Failed to import `tensorflow`. '
                'Please install `tensorflow`. '
                'For example with `pip install tensorflow`.')

Example #5

Source File: embedding_bert_intent_estimator_classifier.py From rasa_nlu_gq with Apache License 2.0

5 votes

def _create_intent_dict(training_data):
        """Create intent dictionary"""

        distinct_intents = set([example.get("intent")
                               for example in training_data.intent_examples])
        return {intent: idx
                for idx, intent in enumerate(sorted(distinct_intents))}

Example #6

Source File: embedding_bert_intent_estimator_classifier.py From rasa_nlu_gq with Apache License 2.0

5 votes

def persist(self, file_name: Text, model_dir: Text):
        # type: (Text) -> Dict[Text, Any]
        """Persist this model into the passed directory.
        Return the metadata necessary to load the model again."""
        if self.estimator is None:
            return {"classifier_file": None}

        # build feature spec for tf.example parsing
        feature_spec = tf.feature_column.make_parse_example_spec(self.feature_columns)
        # build tf.example parser
        serving_input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
        # export tf model
        path = self.estimator.export_savedmodel(model_dir, serving_input_receiver_fn)
        # decode model path to string
        file_dir = os.path.basename(path).decode('utf-8')


        with io.open(os.path.join(
                model_dir,
                file_name + "_inv_intent_dict.pkl"), 'wb') as f:
            pickle.dump(self.inv_intent_dict, f)
        with io.open(os.path.join(
                model_dir,
                file_name + "_encoded_all_intents.pkl"), 'wb') as f:
            pickle.dump(self.encoded_all_intents, f)

        return {"file": file_name}

Example #7

Source File: tagging_data_lib.py From models with Apache License 2.0

5 votes

def add_word_and_label_id(self, word, label_id):
    """Adds word and label_id pair in the example."""
    self.words.append(word)
    self.label_ids.append(label_id)

Example #8

Source File: tagging_data_lib.py From models with Apache License 2.0

5 votes

def _read_one_file(file_name, label_list):
  """Reads one file and returns a list of `InputExample` instances."""
  lines = tf.io.gfile.GFile(file_name, "r").readlines()
  examples = []
  label_id_map = {label: i for i, label in enumerate(label_list)}
  sentence_id = 0
  example = InputExample(sentence_id=0)
  for line in lines:
    line = line.strip("\n")
    if line:
      # The format is: <token>\t<label> for train/dev set and <token> for test.
      items = line.split("\t")
      assert len(items) == 2 or len(items) == 1
      token = items[0].strip()

      # Assign a dummy label_id for test set
      label_id = label_id_map[items[1].strip()] if len(items) == 2 else 0
      example.add_word_and_label_id(token, label_id)
    else:
      # Empty line indicates a new sentence.
      if example.words:
        examples.append(example)
        sentence_id += 1
        example = InputExample(sentence_id=sentence_id)

  if example.words:
    examples.append(example)
  return examples

Example #9

Source File: tagging_data_lib.py From models with Apache License 2.0

5 votes

def _tokenize_example(example, max_length, tokenizer, text_preprocessing=None):
  """Tokenizes words and breaks long example into short ones."""
  # Needs additional [CLS] and [SEP] tokens.
  max_length = max_length - 2
  new_examples = []
  new_example = InputExample(sentence_id=example.sentence_id)
  for i, word in enumerate(example.words):
    if any([x < 0 for x in example.label_ids]):
      raise ValueError("Unexpected negative label_id: %s" % example.label_ids)

    if text_preprocessing:
      word = text_preprocessing(word)
    subwords = tokenizer.tokenize(word)
    if (not subwords or len(subwords) > max_length) and word:
      subwords = [_UNK_TOKEN]

    if len(subwords) + len(new_example.words) > max_length:
      # Start a new example.
      new_examples.append(new_example)
      new_example = InputExample(sentence_id=example.sentence_id)

    for j, subword in enumerate(subwords):
      # Use the real label for the first subword, and pad label for
      # the remainings.
      subword_label = example.label_ids[i] if j == 0 else _PADDING_LABEL_ID
      new_example.add_word_and_label_id(subword, subword_label)

  if new_example.words:
    new_examples.append(new_example)

  return new_examples

Example #10

Source File: tagging_data_lib.py From models with Apache License 2.0

5 votes

def _convert_single_example(example, max_seq_length, tokenizer):
  """Converts an `InputExample` instance to a `tf.train.Example` instance."""
  tokens = ["[CLS]"]
  tokens.extend(example.words)
  tokens.append("[SEP]")
  input_ids = tokenizer.convert_tokens_to_ids(tokens)
  label_ids = [_PADDING_LABEL_ID]
  label_ids.extend(example.label_ids)
  label_ids.append(_PADDING_LABEL_ID)

  segment_ids = [0] * len(input_ids)
  input_mask = [1] * len(input_ids)

  # Pad up to the sequence length.
  while len(input_ids) < max_seq_length:
    input_ids.append(0)
    input_mask.append(0)
    segment_ids.append(0)
    label_ids.append(_PADDING_LABEL_ID)

  def create_int_feature(values):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))

  features = collections.OrderedDict()
  features["input_ids"] = create_int_feature(input_ids)
  features["input_mask"] = create_int_feature(input_mask)
  features["segment_ids"] = create_int_feature(segment_ids)
  features["label_ids"] = create_int_feature(label_ids)
  features["sentence_id"] = create_int_feature([example.sentence_id])

  tf_example = tf.train.Example(features=tf.train.Features(feature=features))
  return tf_example

Example #11

Source File: embedding_bert_intent_estimator_classifier.py From rasa_nlu_gq with Apache License 2.0

4 votes

def process(self, message, **kwargs):
        # type: (Message, **Any) -> None
        """Return the most likely intent and its similarity to the input."""

        intent = {"name": None, "confidence": 0.0}
        intent_ranking = []

        if self.predictor is None:
            logger.error("There is no trained tf.session: "
                         "component is either not trained or "
                         "didn't receive enough training data")

        else:
            X = message.get("text_features").tolist()
            examples = []
            feature = {}
            # convert input x to tf.feature with float feature spec
            feature['a_in'] = tf.train.Feature(float_list=tf.train.FloatList(value=X))
            # build tf.example for prediction
            example = tf.train.Example(
                features=tf.train.Features(
                    feature=feature
                )
            )
            # serialize tf.example to string
            examples.append(example.SerializeToString())

            # Make predictions.
            result_dict = self.predictor({'inputs': examples})
            result_score_list = result_dict['scores'][0]
            max_score = np.max(result_dict['scores'][0])
            max_index = np.argmax(result_dict['scores'][0])

            # if X contains all zeros do not predict some label
            if len(X)>0:
                intent = {
                    "name": self.inv_intent_dict[max_index], "confidence": float(max_score)
                }
                ranking = result_score_list[:INTENT_RANKING_LENGTH]
                intent_ranking = [{"name": self.inv_intent_dict[intent_idx],
                                   "confidence": float(score)}
                                  for intent_idx, score in enumerate(ranking)]

                intent_ranking = sorted(intent_ranking, key=lambda s: s['confidence'], reverse=True)

        message.set("intent", intent, add_to_output=True)
        message.set("intent_ranking", intent_ranking, add_to_output=True)

Example #12

Source File: parse_sdf_utils.py From deep-molecular-massspec with Apache License 2.0

4 votes

def dict_to_tfexample(mol_dict):
  """Convert dictionary of molecular info to tfExample.

  Args:
    mol_dict : dictionary containing molecule info.

  Returns:
    example : tf.example containing mol_dict info.
  """
  example = tf.train.Example()
  feature_map = example.features.feature
  feature_map[fmap_constants.ATOM_WEIGHTS].float_list.value.extend(
      mol_dict[fmap_constants.ATOM_WEIGHTS])
  feature_map[fmap_constants.ATOM_IDS].int64_list.value.extend(
      mol_dict[fmap_constants.ATOM_IDS])
  feature_map[fmap_constants.ADJACENCY_MATRIX].int64_list.value.extend(
      mol_dict[fmap_constants.ADJACENCY_MATRIX])
  feature_map[fmap_constants.MOLECULE_WEIGHT].float_list.value.append(
      mol_dict[fmap_constants.MOLECULE_WEIGHT])
  feature_map[fmap_constants.DENSE_MASS_SPEC].float_list.value.extend(
      mol_dict[fmap_constants.DENSE_MASS_SPEC])
  feature_map[fmap_constants.INCHIKEY].bytes_list.value.append(
      mol_dict[fmap_constants.INCHIKEY].encode('utf-8'))
  feature_map[fmap_constants.MOLECULAR_FORMULA].bytes_list.value.append(
      mol_dict[fmap_constants.MOLECULAR_FORMULA].encode('utf-8'))
  feature_map[fmap_constants.NAME].bytes_list.value.append(
      mol_dict[fmap_constants.NAME].encode('utf-8'))
  feature_map[fmap_constants.SMILES].bytes_list.value.append(
      mol_dict[fmap_constants.SMILES].encode('utf-8'))

  if fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY in mol_dict:
    feature_map[
        fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY].int64_list.value.append(
            mol_dict[fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY])

  for fp_len in ms_constants.NUM_CIRCULAR_FP_BITS_LIST:
    for rad in ms_constants.CIRCULAR_FP_RADII_LIST:
      for fp_type in fmap_constants.FP_TYPE_LIST:
        fp_key = ms_constants.CircularFingerprintKey(fp_type, fp_len, rad)
        feature_map[str(fp_key)].float_list.value.extend(mol_dict[fp_key])

  return example

Example #13

Source File: parse_sdf_utils.py From deep-molecular-massspec with Apache License 2.0

4 votes

def write_dicts_to_example(mol_list,
                           record_path_name,
                           max_atoms,
                           max_mass_spec_peak_loc,
                           true_library_array_path_name=None):
  """Helper function for writing tf.record from all examples.

  Uses dict_to_tfexample to write the actual tf.example

  Args:
    mol_list : list of rdkit.Mol objects
    record_path_name : file name for storing tf record
    max_atoms : max. number of atoms to consider in a molecule.
    max_mass_spec_peak_loc : largest mass/charge ratio to allow in a spectra
    true_library_array_path_name: path for storing np.array of true spectra

  Returns:
    - Writes tf.Record of an example for each eligible molecule
    (i.e. # atoms < max_atoms)
    - Writes np.array (len(mol_list), max_mass_spec_peak_loc) to
      true_library_array_path_name if it is defined.
  """
  options = tf.python_io.TFRecordOptions(
      tf.python_io.TFRecordCompressionType.ZLIB)

  # Wrapper function to add index value to dictionary
  if true_library_array_path_name:
    spectra_matrix = np.zeros((len(mol_list), max_mass_spec_peak_loc))

    def make_mol_dict_with_saved_array(idx, mol):
      mol_dict = make_mol_dict(mol, max_atoms, max_mass_spec_peak_loc)
      mol_dict[fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY] = idx
      spectra_matrix[idx, :] = mol_dict[fmap_constants.DENSE_MASS_SPEC]
      return mol_dict

    make_mol_dict_fn = make_mol_dict_with_saved_array

  else:

    def make_mol_dict_without_saved_array(idx, mol):
      del idx
      return make_mol_dict(mol, max_atoms, max_mass_spec_peak_loc)

    make_mol_dict_fn = make_mol_dict_without_saved_array

  with tf.python_io.TFRecordWriter(record_path_name, options) as writer:
    for idx, mol in enumerate(mol_list):
      mol_dict = make_mol_dict_fn(idx, mol)
      example = dict_to_tfexample(mol_dict)
      writer.write(example.SerializeToString())

  if true_library_array_path_name:
    with tf.gfile.Open(true_library_array_path_name, 'w') as f:
      np.save(f, spectra_matrix)

Example #14

Source File: model_agnostic_predict.py From model-analysis with Apache License 2.0

4 votes

def __new__(cls, label_keys: List[Text], prediction_keys: List[Text],
              feature_spec: Dict[Text, Any]):
    """Creates a ModelAgnosticConfig instance.

    Creates a config spec for doing ModelAgnostic evaluation (Model evaluation
    without the training eval saved model). This spec defines the basic
    parameters with which to define Features, Predictions, and Labels from
    input Examples.

    Args:
      label_keys: A list of Text, the keys in the input examples which should be
        treated as labels. Currently, this cannot be empty.
      prediction_keys: A list of Text, the keys in the input examples which
        should be treated as predictions. Currently, this cannot be empty.
      feature_spec: In the case only FPL is provided (via Examples), a dict
        defining how to parse the example. This should be of the form "key" ->
        FixedLenFeature or VarLenFeature. This is required to parse input
        examples.

    Returns:
      A ModelAgnosticConfig instance.

    Raises:
      ValueError: This inputs supplied are properly defined..
    """

    if not label_keys:
      raise ValueError('ModelAgnosticConfig must have label keys set.')
    if not prediction_keys:
      raise ValueError('ModelAgnosticConfig must have prediction keys set.')
    if not feature_spec:
      raise ValueError('ModelAgnosticConfig must have feature_spec set.')
    for key in prediction_keys:
      if key not in feature_spec:
        raise ValueError('Prediction key %s not defined in feature_spec.' % key)
    for key in label_keys:
      if key not in feature_spec:
        raise ValueError('Label key %s not defined in feature_spec.' % key)

    return super(ModelAgnosticConfig, cls).__new__(
        cls,
        label_keys=label_keys,
        prediction_keys=prediction_keys,
        feature_spec=feature_spec)

Example #15

Source File: model_agnostic_predict.py From model-analysis with Apache License 2.0

4 votes

def get_fpls_from_examples(self, input_example_bytes_list: List[bytes]
                            ) -> List[Any]:
    """Generates FPLs from serialized examples using a ModelAgnostic graph.

    Args:
      input_example_bytes_list: A string representing the serialized tf.example
        protos to be parsed by the graph.

    Returns:
      A list of FeaturesPredictionsLabels generated from the input examples.
    """
    # Call the graph via the created session callable _get_features_fn and
    # get the tensor representation of the features.
    features = self._get_features_fn(input_example_bytes_list)
    split_features = {}
    num_examples = 0

    # Split the features by the example keys. Also verify all each example
    # key has the same number of total examples.
    for key in features.keys():
      split_features[key] = util.split_tensor_value(features[key])
      if num_examples == 0:
        num_examples = len(split_features[key])
      elif num_examples != len(split_features[key]):
        raise ValueError(
            'Different keys unexpectedly had different number of '
            'examples. Key %s unexpectedly had %s elements.' % key,
            len(split_features[key]))

    # Sort out the examples into individual FPLs: one example -> one FPL.
    # Sort them into Features, Predictions, or Labels according to the input
    # config.
    result = []
    for i in range(num_examples):
      labels = {}
      predictions = {}
      features = {}
      for key in split_features:
        if key in self._config.label_keys:
          labels[key] = {encoding.NODE_SUFFIX: split_features[key][i]}
        if key in self._config.prediction_keys:
          predictions[key] = {encoding.NODE_SUFFIX: split_features[key][i]}
        features[key] = {encoding.NODE_SUFFIX: split_features[key][i]}

      result.append(
          types.FeaturesPredictionsLabels(
              input_ref=i,
              features=features,
              predictions=predictions,
              labels=labels))

    return result