Python tensorflow.regex_replace() Examples

The following are 3 code examples of tensorflow.regex_replace(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: input_fn.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def parse_raw_text(sentence):
  """Splits text tensor by word to sparse sequence of tokens.

  Args:
    sentence: `tf.string`, with text record to split.

  Returns:
    Dictionary mapping feature name to tensors with the following entries
    `constants.TOKENS` mapping to a `SparseTensor` and
    `constants.SEQUENCE_LENGTH` mapping to a one-dimensional integer `Tensor`.

  """

  tokens = tf.regex_replace(sentence, _CHAR_TO_FILTER_OUT, ' ',
                            replace_global=True)
  sparse_sequence = tf.string_split(tokens)
  features = {
      constants.TOKENS: sparse_sequence,
      constants.SEQUENCE_LENGTH: get_sparse_tensor_size(sparse_sequence)
  }
  return features 
Example #2
Source File: logistic_regression.py    From tf-encrypted with Apache License 2.0 5 votes vote down vote up
def provide_data(self):
        def decode(line):
            fields = tf.string_split([line], self.field_delim).values
            if self.index:  # Skip index
                fields = fields[1:]
            fields = tf.regex_replace(fields, "|".join(self.na_values), "nan")
            fields = tf.string_to_number(fields, tf.float32)
            return fields

        def fill_na(fields, fill_values):
            fields = tf.where(tf.is_nan(fields), fill_values, fields)
            return fields

        dataset = tf.data.TextLineDataset(self.local_data_file)
        if self.header:  # Skip header
            dataset = dataset.skip(1)
        dataset = (
            dataset.map(decode)
            .map(lambda x: fill_na(x, self.data_schema.field_defaults))
            .repeat()
            .batch(self.batch_size)
        )

        iterator = dataset.make_one_shot_iterator()
        batch = iterator.get_next()
        batch = tf.reshape(batch, [self.batch_size, self.data_schema.field_num])
        return batch 
Example #3
Source File: metrics.py    From BERT with Apache License 2.0 4 votes vote down vote up
def word_error_rate(raw_predictions,
                    labels,
                    lookup=None,
                    weights_fn=common_layers.weights_nonzero):
  """Calculate word error rate.

  Args:
    raw_predictions: The raw predictions.
    labels: The actual labels.
    lookup: A tf.constant mapping indices to output tokens.
    weights_fn: Weighting function.

  Returns:
    The word error rate.
  """

  def from_tokens(raw, lookup_):
    gathered = tf.gather(lookup_, tf.cast(raw, tf.int32))
    joined = tf.regex_replace(tf.reduce_join(gathered, axis=1), b"<EOS>.*", b"")
    cleaned = tf.regex_replace(joined, b"_", b" ")
    tokens = tf.string_split(cleaned, " ")
    return tokens

  def from_characters(raw, lookup_):
    """Convert ascii+2 encoded codes to string-tokens."""
    corrected = tf.bitcast(
        tf.clip_by_value(tf.subtract(raw, 2), 0, 255), tf.uint8)

    gathered = tf.gather(lookup_, tf.cast(corrected, tf.int32))[:, :, 0]
    joined = tf.reduce_join(gathered, axis=1)
    cleaned = tf.regex_replace(joined, b"\0", b"")
    tokens = tf.string_split(cleaned, " ")
    return tokens

  if lookup is None:
    lookup = tf.constant([chr(i) for i in range(256)])
    convert_fn = from_characters
  else:
    convert_fn = from_tokens

  if weights_fn is not common_layers.weights_nonzero:
    raise ValueError("Only weights_nonzero can be used for this metric.")

  with tf.variable_scope("word_error_rate", values=[raw_predictions, labels]):

    raw_predictions = tf.squeeze(
        tf.argmax(raw_predictions, axis=-1), axis=(2, 3))
    labels = tf.squeeze(labels, axis=(2, 3))

    reference = convert_fn(labels, lookup)
    predictions = convert_fn(raw_predictions, lookup)

    distance = tf.reduce_sum(
        tf.edit_distance(predictions, reference, normalize=False))
    reference_length = tf.cast(
        tf.size(reference.values, out_type=tf.int32), dtype=tf.float32)

    return distance / reference_length, reference_length