Python tensorflow.sequence_mask() Examples

The following are 30 code examples of tensorflow.sequence_mask(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: trf_bert_ebm_residual_estimator.py    From BERT with Apache License 2.0 6 votes vote down vote up
def token_seq_truncted(token_seq, finished_index, max_length): 
	seq_shape = bert_utils.get_shape_list(token_seq, expected_rank=[2,3])
	batch_size = seq_shape[0]
	token_seq = token_seq[:, :max_length]

	token_seq = tf.concat([token_seq, finished_index*tf.cast(tf.ones((batch_size, 1)), tf.int32)], axis=-1)

	token_seq = tf.cast(token_seq, tf.int32)
	seq_shape = bert_utils.get_shape_list(token_seq, expected_rank=[2,3])
	match_indices = tf.where(                          # [[5, 5, 2, 5, 4],
	tf.equal(finished_index, token_seq),                              #  [0, 5, 2, 3, 5],
		x=tf.range(seq_shape[1]) * tf.ones_like(token_seq),  #  [5, 1, 5, 5, 5]]
		y=(seq_shape[1])*tf.ones_like(token_seq))

	finished_pos = tf.reduce_min(match_indices, axis=1)				
	sequence_mask = tf.sequence_mask(finished_pos+1, maxlen=seq_shape[1])

	token_seq = tf.cast(sequence_mask, tf.float32) * tf.cast(token_seq, tf.float32)
				
	return tf.cast(token_seq, tf.int32) 
Example #2
Source File: sig.py    From DeepXi with Mozilla Public License 2.0 6 votes vote down vote up
def example(self, s, d, s_len, d_len, snr):
		"""
		Compute example for Deep Xi, i.e. observation (noisy-speech STMS)
		and target (mapped a priori SNR).

		Argument/s:
			s - clean speech (dtype=tf.int32).
			d - noise (dtype=tf.int32).
			s_len - clean-speech length without padding (samples).
			d_len - noise length without padding (samples).
			snr - SNR level.

		Returns:
			x_STMS - noisy-speech short-time magnitude spectrum.
			xi_bar - mapped a priori SNR.
			n_frames - number of time-domain frames.
		"""
		s_STMS, d_STMS, x_STMS, n_frames = self.mix(s, d, s_len, d_len, snr)
		mask = tf.expand_dims(tf.cast(tf.sequence_mask(n_frames), tf.float32), 2)
		xi_bar = tf.multiply(self.xi_bar(s_STMS, d_STMS), mask)
		return x_STMS, xi_bar, n_frames 
Example #3
Source File: nn_module.py    From tensorflow-XNN with MIT License 6 votes vote down vote up
def embed_subword(x, size, dim, sequence_length, seed=0, mask_zero=False, maxlen=None):
    # std = np.sqrt(2 / dim)
    std = 0.001
    minval = -std
    maxval = std
    emb = tf.Variable(tf.random_uniform([size, dim], minval, maxval, dtype=tf.float32, seed=seed))
    # None * max_seq_len * max_word_len * embed_dim
    out = tf.nn.embedding_lookup(emb, x)
    if mask_zero:
        # word_len: None * max_seq_len
        # mask: shape=None * max_seq_len * max_word_len
        mask = tf.sequence_mask(sequence_length, maxlen)
        mask = tf.expand_dims(mask, axis=-1)
        mask = tf.cast(mask, tf.float32)
        out = out * mask
    # None * max_seq_len * embed_dim
    # according to facebook subword paper, it's sum
    out = tf.reduce_sum(out, axis=2)
    return out 
Example #4
Source File: normalisation.py    From DeepXi with Mozilla Public License 2.0 6 votes vote down vote up
def SeqLayerNorm(input, seq_len, centre=True, scale=True): # layer norm for 3D tensor.
	mask = tf.cast(tf.expand_dims(tf.sequence_mask(seq_len), 2), tf.float32) # convert mask to float.
	input_dim = input.get_shape().as_list()[-1] # get number of input dimensions.
	den = tf.multiply(tf.reduce_sum(mask, axis=1, keepdims=True), input_dim) # inverse of the number of input dimensions.
	mean = tf.divide(tf.reduce_sum(tf.multiply(input, mask), axis=[1, 2], keepdims=True), den) # mean over the input dimensions.
	var = tf.divide(tf.reduce_sum(tf.multiply(tf.square(tf.subtract(input, mean)), mask), axis=[1, 2], 
	 	keepdims = True), den) # variance over the input dimensions.
	if centre:
		beta = tf.get_variable("beta", input_dim, dtype=tf.float32,  
			initializer=tf.constant_initializer(0.0), trainable=True)
	else: beta = tf.constant(np.zeros(input_dim), name="beta", dtype=tf.float32)
	if scale:
		gamma = tf.get_variable("Gamma", input_dim, dtype=tf.float32,  
			initializer=tf.constant_initializer(1.0), trainable=True)
	else: gamma = tf.constant(np.ones(input_dim), name="Gamma", dtype=tf.float32)
	norm = tf.nn.batch_normalization(input, mean, var, offset=beta, scale=gamma, 
		variance_epsilon = 1e-12) # normalise batch.
	norm = tf.multiply(norm, mask)
	return norm 
Example #5
Source File: sequence_tagger.py    From OpenNMT-tf with MIT License 6 votes vote down vote up
def update_metrics(self, metrics, predictions, labels):
    weights = tf.sequence_mask(
        labels["length"], maxlen=tf.shape(labels["tags"])[1], dtype=tf.float32)

    metrics["accuracy"].update_state(
        labels["tags_id"], predictions["tags_id"], sample_weight=weights)

    if self.tagging_scheme in ("bioes",):
      flag_fn = None
      if self.tagging_scheme == "bioes":
        flag_fn = flag_bioes_tags

      gold_flags, predicted_flags = tf.numpy_function(
          flag_fn,
          [labels["tags"], predictions["tags"], labels["length"]],
          [tf.bool, tf.bool])

      metrics["f1"].update_state(gold_flags, predicted_flags) 
Example #6
Source File: model.py    From nslt with Apache License 2.0 6 votes vote down vote up
def _compute_loss(self, logits):
        """Compute optimization loss."""
        target_output = self.iterator.target_output

        if self.time_major:
            target_output = tf.transpose(target_output)

        max_time = self.get_max_time(target_output)

        crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target_output, logits=logits)

        target_weights = tf.sequence_mask(self.iterator.target_sequence_length, max_time, dtype=logits.dtype)

        if self.time_major:
            target_weights = tf.transpose(target_weights)

        loss = tf.reduce_sum(crossent * target_weights) / tf.to_float(self.batch_size)

        return loss 
Example #7
Source File: ner_model.py    From robotreviewer with GNU General Public License v3.0 6 votes vote down vote up
def add_loss_op(self):
        """Defines the loss"""
        if self.config.use_crf:
            log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood(
                    self.logits, self.labels, self.sequence_lengths)
            self.trans_params = trans_params # need to evaluate it for decoding
            self.loss = tf.reduce_mean(-log_likelihood)
        else:
            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.logits, labels=self.labels)
            mask = tf.sequence_mask(self.sequence_lengths)
            losses = tf.boolean_mask(losses, mask)
            self.loss = tf.reduce_mean(losses)

        # for tensorboard
        tf.summary.scalar("loss", self.loss) 
Example #8
Source File: tf_utils.py    From mipsqa with Apache License 2.0 6 votes vote down vote up
def exp_mask(logits, mask, mask_is_length=True):
  """Exponential mask for logits.

  Logits cannot be masked with 0 (i.e. multiplying boolean mask)
  because expnentiating 0 becomes 1. `exp_mask` adds very large negative value
  to `False` portion of `mask` so that the portion is effectively ignored
  when exponentiated, e.g. softmaxed.

  Args:
    logits: Arbitrary-rank logits tensor to be masked.
    mask: `boolean` type mask tensor.
      Could be same shape as logits (`mask_is_length=False`)
      or could be length tensor of the logits (`mask_is_length=True`).
    mask_is_length: `bool` value. whether `mask` is boolean mask.
  Returns:
    Masked logits with the same shape of `logits`.
  """
  if mask_is_length:
    mask = tf.sequence_mask(mask, maxlen=tf.shape(logits)[-1])
  return logits + (1.0 - tf.cast(mask, 'float')) * VERY_LARGE_NEGATIVE_VALUE 
Example #9
Source File: rewards.py    From Counterfactual-StoryRW with MIT License 6 votes vote down vote up
def _discount_reward_tensor_1d(reward, sequence_length,
                               discount=1., dtype=None):
    if sequence_length is None:
        raise ValueError('sequence_length must not be `None` for 1D reward.')

    batch_size = tf.shape(reward)[0]
    max_seq_length = tf.reduce_max(sequence_length)
    dtype = dtype or reward.dtype

    if discount == 1.:
        dmat = tf.ones(
            tf.concat([[batch_size], [max_seq_length]], 0), dtype=dtype)
    else:
        mask = tf.sequence_mask(sequence_length, dtype=dtype)
        mask = tf.concat([mask[:, 1:], tf.zeros_like(mask[:, -1:])], axis=1)
        # Make each row = [discount, ..., discount, 1, ..., 1]
        dmat = mask * discount + (1 - mask)
        dmat = tf.cumprod(dmat, axis=1, reverse=True)

    disc_reward = dmat * tf.expand_dims(reward, -1)
    disc_reward = mask_sequences(
        disc_reward, sequence_length, dtype=dtype, tensor_rank=2)

    return disc_reward 
Example #10
Source File: tfutil.py    From inferbeddings with MIT License 6 votes vote down vote up
def mask_3d(sequences, sequence_lengths, mask_value, dimension=2):
    """
    Given a batch of matrices, each with shape m x n, mask the values in each
    row after the positions indicated in sentence_sizes.
    This function is supposed to mask the last columns in the raw attention
    matrix (e_{i, j}) in cases where the sentence2 is smaller than the
    maximum.
    :param sequences: tensor with shape (batch_size, m, n)
    :param sequence_lengths: tensor with shape (batch_size) containing the sentence sizes that
        should be limited
    :param mask_value: scalar value to assign to items after sentence size
    :param dimension: over which dimension to mask values
    :return: a tensor with the same shape as `values`
    """
    if dimension == 1:
        sequences = tf.transpose(sequences, [0, 2, 1])
    time_steps1, time_steps2 = tf.shape(sequences)[1], tf.shape(sequences)[2]
    ones = tf.ones_like(sequences, dtype=tf.int32)
    pad_values = mask_value * tf.cast(ones, tf.float32)
    mask = tf.sequence_mask(sequence_lengths, time_steps2)
    # mask is (batch_size, sentence2_size). we have to tile it for 3d
    mask3d = tf.tile(tf.expand_dims(mask, 1), (1, time_steps1, 1))
    masked = tf.where(mask3d, sequences, pad_values)
    return tf.transpose(masked, [0, 2, 1]) if dimension == 1 else masked 
Example #11
Source File: triples2seq.py    From Zeroshot-QuestionGeneration with MIT License 6 votes vote down vote up
def __create_loss(self):

        print('Creating loss...')
        start = time.time()

        self.decoder_logits = tf.identity(self.decoder_outputs_train.rnn_output, name="decoder_logits")
        self.decoder_pred = tf.argmax(self.decoder_logits, axis=-1, name="decoder_pred")

        # masking the sequence in order to calculate the error according to the calculated
        mask = tf.sequence_mask(self.decoder_inputs_length_train, maxlen=self.decoder_max_length, dtype=tf.float32,
                                name="masks")

        # Control loss dimensions with `average_across_timesteps` and `average_across_batch`
        self.loss = tf.contrib.seq2seq.sequence_loss(logits=self.decoder_logits,
                                                     targets=self.decoder_targets_train,
                                                     average_across_timesteps=False,
                                                     average_across_batch=False,
                                                     weights=mask,
                                                     name="batch_loss")

        print('Building loss in: ', time.time() - start, ' secs') 
Example #12
Source File: attention_test.py    From shortest-path with The Unlicense 6 votes vote down vote up
def test_softmax_masking2(self):

        max_len = 3
        axis = 1
        logits = tf.zeros([max_len, max_len])
        seq_len = [1,2,3]
        mask = tf.sequence_mask(seq_len, max_len)

        r = softmax_with_masking(logits, mask, axis)
        r = np.array(r)

        expected = np.array([
            [1.0,0.0,0],
            [0.5,0.5,0],
            [1.0/3.0, 1.0/3.0, 1.0/3.0],
        ])

        np.testing.assert_almost_equal(r, expected) 
Example #13
Source File: tripletext2seq.py    From Zeroshot-QuestionGeneration with MIT License 6 votes vote down vote up
def __create_loss(self):

        print('Creating loss...')
        start = time.time()

        self.decoder_logits = tf.identity(self.decoder_outputs_train.rnn_output, name="decoder_logits")
        self.decoder_pred = tf.argmax(self.decoder_logits, axis=-1, name="decoder_pred")

        # masking the sequence in order to calculate the error according to the calculated
        mask = tf.sequence_mask(self.decoder_inputs_length_train, maxlen=self.decoder_max_length, dtype=tf.float32,
                                name="masks")

        # Control loss dimensions with `average_across_timesteps` and `average_across_batch`
        self.loss = tf.contrib.seq2seq.sequence_loss(logits=self.decoder_logits,
                                                     targets=self.decoder_targets_train,
                                                     average_across_timesteps=False,
                                                     average_across_batch=False,
                                                     weights=mask,
                                                     name="batch_loss")

        print('Building loss in: ', time.time() - start, ' secs') 
Example #14
Source File: model.py    From OpenNMT-tf with MIT License 6 votes vote down vote up
def score(self, features, labels):
    outputs, _ = self(features, labels=labels)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels["ids_out"], outputs["logits"])
    weights = tf.sequence_mask(labels["length"], dtype=cross_entropy.dtype)
    masked_cross_entropy = cross_entropy * weights
    scores = tf.reduce_sum(masked_cross_entropy, axis=1)
    results = {
        "cross_entropy": cross_entropy,
        "score": scores,
        "tokens": labels["tokens"],
        "length": self.decoder_inputter.get_length(labels, ignore_special_tokens=True)
    }
    if "attention" in outputs:
      results["attention"] = outputs["attention"]
    return results 
Example #15
Source File: loss_modules.py    From ludwig with Apache License 2.0 6 votes vote down vote up
def cross_entropy_sequence_loss(logits, targets, sequence_length):
    """Calculates the per-example cross-entropy loss for a sequence of logits and
      masks out all losses passed the sequence length.
    Args:
      logits: Logits of shape `[B, T, vocab_size]`
      targets: Target classes of shape `[B, T]`
      sequence_length: An int32 tensor of shape `[B]` corresponding
        to the length of each input
    Returns:
      A tensor of shape [T, B] that contains the loss per example, per time step.
    """
    with tf.compat.v1.variable_scope('sequence_loss'):
        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=targets)
        # Mask out the losses we don't care about
        loss_mask = tf.sequence_mask(
            tf.cast(sequence_length, tf.int32),
            tf.cast(tf.shape(targets)[1], tf.int32)
        )
        losses = losses * tf.cast(loss_mask, tf.float32)
        return losses 
Example #16
Source File: attention_test.py    From shortest-path with The Unlicense 6 votes vote down vote up
def test_softmax_masking(self):

        max_len = 3
        axis = 1
        logits = tf.eye(max_len)
        seq_len = [1,2,2]
        mask = tf.sequence_mask(seq_len, max_len)

        r = softmax_with_masking(logits, mask, axis)
        r = np.array(r)

        d = math.exp(1) + math.exp(0)

        expected = np.array([
            [1,0,0],
            [math.exp(0)/d, math.exp(1)/d,0],
            [0.5, 0.5, 0],
        ])

        np.testing.assert_almost_equal(r, expected) 
Example #17
Source File: nn_module.py    From BERT with Apache License 2.0 6 votes vote down vote up
def embed_subword(x, size, dim, sequence_length, seed=0, mask_zero=False, maxlen=None):
    # std = np.sqrt(2 / dim)
    std = 0.001
    minval = -std
    maxval = std
    emb = tf.Variable(tf.random_uniform([size, dim], minval, maxval, dtype=tf.float32, seed=seed))
    # None * max_seq_len * max_word_len * embed_dim
    out = tf.nn.embedding_lookup(emb, x)
    if mask_zero:
        # word_len: None * max_seq_len
        # mask: shape=None * max_seq_len * max_word_len
        mask = tf.sequence_mask(sequence_length, maxlen)
        mask = tf.expand_dims(mask, axis=-1)
        mask = tf.cast(mask, tf.float32)
        out = out * mask
    # None * max_seq_len * embed_dim
    # according to facebook subword paper, it's sum
    out = tf.reduce_sum(out, axis=2)
    return out 
Example #18
Source File: area_attention.py    From BERT with Apache License 2.0 6 votes vote down vote up
def lengths_to_area_mask(feature_length, length, max_area_size):
  """Generates a non-padding mask for areas based on lengths.

  Args:
    feature_length: a tensor of [batch_size]
    length: the length of the batch
    max_area_size: the maximum area size considered
  Returns:
    mask: a tensor in shape of [batch_size, num_areas]
  """

  paddings = tf.cast(tf.expand_dims(
      tf.logical_not(
          tf.sequence_mask(feature_length, maxlen=length)), 2), tf.float32)
  _, _, area_sum, _, _ = compute_area_features(paddings,
                                               max_area_width=max_area_size)
  mask = tf.squeeze(tf.logical_not(tf.cast(area_sum, tf.bool)), [2])
  return mask 
Example #19
Source File: trf_bert_ebm_gpt_estimator.py    From BERT with Apache License 2.0 6 votes vote down vote up
def token_seq_truncted(token_seq, finished_index, max_length): 
	seq_shape = bert_utils.get_shape_list(token_seq, expected_rank=[2,3])
	batch_size = seq_shape[0]
	token_seq = token_seq[:, :max_length]

	token_seq = tf.concat([token_seq, finished_index*tf.cast(tf.ones((batch_size, 1)), tf.int32)], axis=-1)

	token_seq = tf.cast(token_seq, tf.int32)
	seq_shape = bert_utils.get_shape_list(token_seq, expected_rank=[2,3])
	match_indices = tf.where(                          # [[5, 5, 2, 5, 4],
	tf.equal(finished_index, token_seq),                              #  [0, 5, 2, 3, 5],
		x=tf.range(seq_shape[1]) * tf.ones_like(token_seq),  #  [5, 1, 5, 5, 5]]
		y=(seq_shape[1])*tf.ones_like(token_seq))

	finished_pos = tf.reduce_min(match_indices, axis=1)				
	sequence_mask = tf.sequence_mask(finished_pos+1, maxlen=seq_shape[1])

	token_seq = tf.cast(sequence_mask, tf.float32) * tf.cast(token_seq, tf.float32)
				
	return tf.cast(token_seq, tf.int32) 
Example #20
Source File: trf_bert_ebm_gpt.py    From BERT with Apache License 2.0 6 votes vote down vote up
def token_seq_truncted(token_seq, finished_index, max_length): 
	seq_shape = bert_utils.get_shape_list(token_seq, expected_rank=[2,3])
	batch_size = seq_shape[0]
	token_seq = token_seq[:, :max_length]

	token_seq = tf.concat([token_seq, finished_index*tf.cast(tf.ones((batch_size, 1)), tf.int32)], axis=-1)

	token_seq = tf.cast(token_seq, tf.int32)
	seq_shape = bert_utils.get_shape_list(token_seq, expected_rank=[2,3])
	match_indices = tf.where(                          # [[5, 5, 2, 5, 4],
	tf.equal(finished_index, token_seq),                              #  [0, 5, 2, 3, 5],
		x=tf.range(seq_shape[1]) * tf.ones_like(token_seq),  #  [5, 1, 5, 5, 5]]
		y=(seq_shape[1])*tf.ones_like(token_seq))

	finished_pos = tf.reduce_min(match_indices, axis=1)				
	sequence_mask = tf.sequence_mask(finished_pos+1, maxlen=seq_shape[1])

	token_seq = tf.cast(sequence_mask, tf.float32) * tf.cast(token_seq, tf.float32)
				
	return tf.cast(token_seq, tf.int32) 
Example #21
Source File: networks.py    From bangla-tts with GNU General Public License v3.0 5 votes vote down vote up
def Attention(Q, K, V, mononotic_attention=False, prev_max_attentions=None):
    '''
    Args:
      Q: Queries. (B, T/r, d)
      K: Keys. (B, N, d)
      V: Values. (B, N, d)
      mononotic_attention: A boolean. At training, it is False.
      prev_max_attentions: (B,). At training, it is set to None.

    Returns:
      R: [Context Vectors; Q]. (B, T/r, 2d)
      alignments: (B, N, T/r)
      max_attentions: (B, T/r)
    '''
    A = tf.matmul(Q, K, transpose_b=True) * tf.rsqrt(tf.to_float(d))
    if mononotic_attention:  # for inference
        key_masks = tf.sequence_mask(prev_max_attentions, max_N)
        reverse_masks = tf.sequence_mask(max_N - attention_win_size - prev_max_attentions, max_N)[:, ::-1]
        masks = tf.logical_or(key_masks, reverse_masks)
        masks = tf.tile(tf.expand_dims(masks, 1), [1, max_T, 1])
        paddings = tf.ones_like(A) * (-2 ** 32 + 1)  # (B, T/r, N)
        A = tf.where(tf.equal(masks, False), A, paddings)
    A = tf.nn.softmax(A) # (B, T/r, N)
    max_attentions = tf.argmax(A, -1)  # (B, T/r)
    R = tf.matmul(A, V)
    R = tf.concat((R, Q), -1)

    alignments = tf.transpose(A, [0, 2, 1]) # (B, N, T/r)

    return R, alignments, max_attentions 
Example #22
Source File: encoder.py    From OpenNMT-tf with MIT License 5 votes vote down vote up
def build_mask(self, inputs, sequence_length=None, dtype=tf.bool):
    """Builds a boolean mask for :obj:`inputs`."""
    if sequence_length is None:
      return None
    return tf.sequence_mask(sequence_length, maxlen=tf.shape(inputs)[1], dtype=dtype) 
Example #23
Source File: bert_seq_sample_utils.py    From BERT with Apache License 2.0 5 votes vote down vote up
def get_finised_pos_v1(token_seq, finished_index, max_length): 
	seq_shape = bert_utils.get_shape_list(token_seq, expected_rank=[2,3])
	match_indices = tf.where(                          # [[5, 5, 2, 5, 4],
	tf.equal(finished_index, token_seq),                              #  [0, 5, 2, 3, 5],
		x=tf.range(seq_shape[1]) * tf.ones_like(token_seq),  #  [5, 1, 5, 5, 5]]
		y=(seq_shape[1])*tf.ones_like(token_seq))

	finished_pos = tf.reduce_min(match_indices, axis=1)
	sequence_mask = tf.sequence_mask(finished_pos+1, maxlen=max_length)
	return sequence_mask 
Example #24
Source File: bert_seq_sample_utils.py    From BERT with Apache License 2.0 5 votes vote down vote up
def get_finised_pos(token_seq, finished_index, max_length): 
	tmp_indices = tf.where(tf.equal(token_seq, int(finished_index)))
	finished_pos = tf.segment_min(tmp_indices[:, 1], tmp_indices[:, 0])
	sequence_mask = tf.sequence_mask(finished_pos+1, maxlen=max_length)
	return tf.cast(sequence_mask, tf.int32) 
Example #25
Source File: losses.py    From OpenNMT-tf with MIT License 5 votes vote down vote up
def cross_entropy_sequence_loss(logits,
                                labels,
                                sequence_length,
                                label_smoothing=0.0,
                                average_in_time=False,
                                training=None):
  """Computes the cross entropy loss of sequences.

  Args:
    logits: The unscaled probabilities.
    labels: The true labels.
    sequence_length: The length of each sequence.
    label_smoothing: The label smoothing value.
    average_in_time: If ``True``, also average the loss in the time dimension.
    training: Compute training loss.

  Returns:
    A tuple (cumulated loss, loss normalizer, token-level normalizer).
  """
  batch_size = tf.shape(logits)[0]
  max_time = tf.shape(logits)[1]

  cross_entropy = _softmax_cross_entropy(logits, labels, label_smoothing, training)
  weights = tf.sequence_mask(
      sequence_length, maxlen=max_time, dtype=cross_entropy.dtype)
  loss = tf.reduce_sum(cross_entropy * weights)
  loss_token_normalizer = tf.reduce_sum(weights)

  if average_in_time or not training:
    loss_normalizer = loss_token_normalizer
  else:
    loss_normalizer = tf.cast(batch_size, loss.dtype)

  return loss, loss_normalizer, loss_token_normalizer 
Example #26
Source File: bert_seq_utils.py    From BERT with Apache License 2.0 5 votes vote down vote up
def get_finised_pos(token_seq, finished_index, max_length): 
	tmp_indices = tf.where(tf.equal(token_seq, int(finished_index)))
	finished_pos = tf.segment_min(tmp_indices[:, 1], tmp_indices[:, 0])
	sequence_mask = tf.sequence_mask(finished_pos+1, maxlen=max_length)
	return tf.cast(sequence_mask, tf.int32) 
Example #27
Source File: bert_seq_tpu_utils.py    From BERT with Apache License 2.0 5 votes vote down vote up
def get_finised_pos_v1(token_seq, finished_index, max_length): 
	seq_shape = bert_utils.get_shape_list(token_seq, expected_rank=[2,3])
	match_indices = tf.where(                          # [[5, 5, 2, 5, 4],
	tf.equal(finished_index, token_seq),                              #  [0, 5, 2, 3, 5],
		x=tf.range(seq_shape[1]) * tf.ones_like(token_seq),  #  [5, 1, 5, 5, 5]]
		y=(seq_shape[1])*tf.ones_like(token_seq))

	finished_pos = tf.reduce_min(match_indices, axis=1)
	sequence_mask = tf.sequence_mask(finished_pos+1, maxlen=max_length)
	return tf.cast(sequence_mask, tf.int32) 
Example #28
Source File: char_embedding_utils.py    From BERT with Apache License 2.0 5 votes vote down vote up
def lstm_char_embedding(char_token, char_lengths, char_embedding, 
    config, is_training=True, reuse=None):
    dropout_rate = tf.cond(is_training, 
                        lambda:config.dropout_rate, 
                        lambda:0.0)

    with tf.variable_scope(config.scope+"_lstm_char_embedding_layer", reuse=reuse):
        char_dim = char_embedding.get_shape()[-1]
        input_shape = tf.shape(char_token)
        batch_size = input_shape[0]
        question_len = input_shape[1]
        char_len = input_shape[2]

        in_question_char_repres = tf.nn.embedding_lookup(char_embedding, char_token)
        in_question_char_repres = tf.reshape(in_question_char_repres, shape=[-1, char_len, char_dim])
        question_char_lengths = tf.reshape(char_lengths, [-1])
        quesiton_char_mask = tf.sequence_mask(question_char_lengths, char_len, dtype=tf.float32)  # [batch_size*question_len, q_char_len]
        in_question_char_repres = tf.multiply(in_question_char_repres, tf.expand_dims(quesiton_char_mask, axis=-1))

        (question_char_outputs_fw, question_char_outputs_bw, _) = layer_utils.my_lstm_layer(in_question_char_repres, config.char_lstm_dim,
                input_lengths=question_char_lengths,scope_name="char_lstm", reuse=reuse,
                is_training=is_training, dropout_rate=dropout_rate, use_cudnn=config.use_cudnn)
        question_char_outputs_fw = layer_utils.collect_final_step_of_lstm(question_char_outputs_fw, question_char_lengths - 1)
        question_char_outputs_bw = question_char_outputs_bw[:, 0, :]
        question_char_outputs = tf.concat(axis=1, values=[question_char_outputs_fw, question_char_outputs_bw])
        question_char_outputs = tf.reshape(question_char_outputs, [batch_size, question_len, 2*config.char_lstm_dim])

        return question_char_outputs 
Example #29
Source File: bert_seq_tpu_utils.py    From BERT with Apache License 2.0 5 votes vote down vote up
def get_finised_pos(token_seq, finished_index, max_length): 
	tmp_indices = tf.where(tf.equal(token_seq, int(finished_index)))
	finished_pos = tf.segment_min(tmp_indices[:, 1], tmp_indices[:, 0])
	sequence_mask = tf.sequence_mask(finished_pos+1, maxlen=max_length)
	return tf.cast(sequence_mask, tf.int32) 
Example #30
Source File: MaskLayer.py    From NPNs with GNU General Public License v3.0 5 votes vote down vote up
def __call__(self, m,seq_len):
        with tf.variable_scope(self.scope) as scope:
            self.check_reuse(scope)
            
            max_length = int(m.get_shape()[1])
            seq_len_mask = tf.sequence_mask(seq_len,maxlen = max_length, dtype = m.dtype)
            rank = m.get_shape().ndims
            extra_ones = tf.ones(rank - 2, dtype=tf.int32)
            seq_len_mask = tf.reshape(seq_len_mask, tf.concat((tf.shape(seq_len_mask), extra_ones), 0))
            if not self.mask_from_right:
                seq_len_mask = 1-seq_len_mask
            return m * seq_len_mask - ((seq_len_mask - 1) * self.mask_value)