Python tensorflow.compat.v1.reduce_mean() Examples

The following are 30 code examples of tensorflow.compat.v1.reduce_mean(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.compat.v1 , or try the search function .
Example #1
Source File: shuffle_network.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def loss(self, logits, features):
    """Loss function for Neural Shuffle-Exchange network.

    We use custom loss function as default loss function doesn't
    use padding for calculating loss. We assume that output string is same
    length as the input. If you need other type of output please feel
    free to modify this.

    Args:
      logits: Logits from model
      features: Features, not in one-hot format

    Returns:
       tf.Tensor: Loss value
    """

    onehot_labels = tf.one_hot(features["targets"],
                               self._problem_hparams.vocab_size["targets"])
    cost_vector = tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=logits, labels=onehot_labels)
    return tf.reduce_mean(cost_vector) 
Example #2
Source File: common_layers_test.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def testDmlLoss(self, batch, height, width, num_mixtures, reduce_sum):
    channels = 3
    pred = tf.random_normal([batch, height, width, num_mixtures * 10])
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=0, maxval=256, dtype=tf.int32)
    actual_loss_num, actual_loss_den = common_layers.dml_loss(
        pred=pred, labels=labels, reduce_sum=reduce_sum)
    actual_loss = actual_loss_num / actual_loss_den

    real_labels = common_layers.convert_rgb_to_symmetric_real(labels)
    expected_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=real_labels) / channels
    if reduce_sum:
      expected_loss = tf.reduce_mean(expected_loss)

    actual_loss_val, expected_loss_val = self.evaluate(
        [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val) 
Example #3
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def double_discriminator(x, filters1=128, filters2=None,
                         kernel_size=8, strides=4, pure_mean=False):
  """A convolutional discriminator with 2 layers and concatenated output."""
  if filters2 is None:
    filters2 = 4 * filters1
  with tf.variable_scope("discriminator"):
    batch_size = shape_list(x)[0]
    net = layers().Conv2D(
        filters1, kernel_size, strides=strides, padding="SAME", name="conv1")(x)
    if pure_mean:
      net1 = tf.reduce_mean(net, [1, 2])
    else:
      net1 = mean_with_attention(net, "mean_with_attention1")
      tf.reshape(net, [batch_size, -1])
    net = tf.nn.relu(net)
    net = layers().Conv2D(
        filters2, kernel_size, strides=strides, padding="SAME",
        name="conv2")(net)
    if pure_mean:
      net2 = tf.reduce_mean(net, [1, 2])
    else:
      net2 = mean_with_attention(net, "mean_with_attention2")
    return tf.concat([net1, net2], axis=-1) 
Example #4
Source File: metrics.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def two_class_log_likelihood(predictions, labels, weights_fn=None):
  """Log-likelihood for two class classification with 0/1 labels.

  Args:
    predictions: A float valued tensor of shape [`batch_size`].  Each
      component should be between 0 and 1.
    labels: An int valued tensor of shape [`batch_size`].  Each component
      should either be 0 or 1.
    weights_fn: unused.

  Returns:
    A pair, with the average log likelihood in the first component.
  """
  del weights_fn
  float_predictions = tf.cast(tf.squeeze(predictions), dtype=tf.float64)
  batch_probs = tf.stack([1. - float_predictions, float_predictions], axis=-1)
  int_labels = tf.cast(tf.squeeze(labels), dtype=tf.int32)
  onehot_targets = tf.cast(tf.one_hot(int_labels, 2), dtype=tf.float64)
  chosen_probs = tf.einsum(
      "ij,ij->i", batch_probs, onehot_targets, name="chosen_probs")
  avg_log_likelihood = tf.reduce_mean(tf.log(chosen_probs))
  return avg_log_likelihood, tf.constant(1.0) 
Example #5
Source File: cycle_gan.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def lossfn(real_input, fake_input, compress, hparams, lsgan, name):
  """Loss function."""
  eps = 1e-12
  with tf.variable_scope(name):
    d1 = discriminator(real_input, compress, hparams, "discriminator")
    d2 = discriminator(fake_input, compress, hparams, "discriminator",
                       reuse=True)
    if lsgan:
      dloss = tf.reduce_mean(
          tf.squared_difference(d1, 0.9)) + tf.reduce_mean(tf.square(d2))
      gloss = tf.reduce_mean(tf.squared_difference(d2, 0.9))
      loss = (dloss + gloss)/2
    else:  # cross_entropy
      dloss = -tf.reduce_mean(
          tf.log(d1 + eps)) - tf.reduce_mean(tf.log1p(eps - d2))
      gloss = -tf.reduce_mean(tf.log(d2 + eps))
      loss = (dloss + gloss)/2
    return loss 
Example #6
Source File: discretization.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def vae(x, z_size, name=None):
  """Simple variational autoencoder without discretization.

  Args:
    x: Input to the discretization bottleneck.
    z_size: Number of bits, where discrete codes range from 1 to 2**z_size.
    name: Name for the bottleneck scope.

  Returns:
    Embedding function, latent, loss, mu and log_simga.
  """
  with tf.variable_scope(name, default_name="vae"):
    mu = tf.layers.dense(x, z_size, name="mu")
    log_sigma = tf.layers.dense(x, z_size, name="log_sigma")
    shape = common_layers.shape_list(x)
    epsilon = tf.random_normal([shape[0], shape[1], 1, z_size])
    z = mu + tf.exp(log_sigma / 2) * epsilon
    kl = 0.5 * tf.reduce_mean(
        tf.expm1(log_sigma) + tf.square(mu) - log_sigma, axis=-1)
    free_bits = z_size // 4
    kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0))
    return z, kl_loss, mu, log_sigma 
Example #7
Source File: model.py    From benchmarks with Apache License 2.0 6 votes vote down vote up
def loss_function(self, inputs, build_network_result):
    """Returns the op to measure the loss of the model."""
    logits = build_network_result.logits
    _, labels = inputs
    # TODO(laigd): consider putting the aux logit in the Inception model,
    # which could call super.loss_function twice, once with the normal logits
    # and once with the aux logits.
    aux_logits = build_network_result.extra_info
    with tf.name_scope('xentropy'):
      mlperf.logger.log(key=mlperf.tags.MODEL_HP_LOSS_FN, value=mlperf.tags.CCE)
      cross_entropy = tf.losses.sparse_softmax_cross_entropy(
          logits=logits, labels=labels)
      loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
    if aux_logits is not None:
      with tf.name_scope('aux_xentropy'):
        aux_cross_entropy = tf.losses.sparse_softmax_cross_entropy(
            logits=aux_logits, labels=labels)
        aux_loss = 0.4 * tf.reduce_mean(aux_cross_entropy, name='aux_loss')
        loss = tf.add_n([loss, aux_loss])
    return loss 
Example #8
Source File: evolved_transformer_test.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def _create_greedy_infer_model(self):
    """Creates model for greedy inference testing.

    Returns:
      model: A t2t model.
      features: An map of string to tensor.
    """
    model, features = get_model(transformer.transformer_tiny())

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    return model, features 
Example #9
Source File: ssd_model.py    From benchmarks with Apache License 2.0 6 votes vote down vote up
def loss_function(self, inputs, build_network_result):
    logits = build_network_result.logits

    # Unpack model output back to locations and confidence scores of predictions
    # Shape of pred_loc: [batch_size, NUM_SSD_BOXES, 4]
    # Shape of pred_label: [batch_size, NUM_SSD_BOXES, label_num]
    pred_loc, pred_label = tf.split(logits, [4, self.label_num], 2)

    # Shape of gt_loc: [batch_size, NUM_SSD_BOXES, 4]
    # Shape of gt_label: [batch_size, NUM_SSD_BOXES, 1]
    # Shape of num_gt: [batch_size]
    _, gt_loc, gt_label, num_gt = inputs
    gt_label = tf.cast(gt_label, tf.int32)

    box_loss = self._localization_loss(pred_loc, gt_loc, gt_label, num_gt)
    class_loss = self._classification_loss(pred_label, gt_label, num_gt)

    tf.summary.scalar('box_loss', tf.reduce_mean(box_loss))
    tf.summary.scalar('class_loss', tf.reduce_mean(class_loss))
    return class_loss + box_loss 
Example #10
Source File: utils.py    From lamb with Apache License 2.0 6 votes vote down vote up
def layer_norm(x, reduction_indices, epsilon=1e-9, gain=None, bias=None,
               per_element=True, scope=None):
  """DOC."""
  reduction_indices = ensure_list(reduction_indices)
  mean = tf.reduce_mean(x, reduction_indices, keep_dims=True)
  variance = tf.reduce_mean(tf.squared_difference(x, mean),
                            reduction_indices, keep_dims=True)
  normalized = (x - mean) / tf.sqrt(variance + epsilon)
  dtype = x.dtype
  shape = x.get_shape().as_list()
  for i in six.moves.range(len(shape)):
    if i not in reduction_indices or not per_element:
      shape[i] = 1
  with tf.variable_scope(scope or 'layer_norm'):
    if gain is None:
      gain = tf.get_variable('gain', shape=shape, dtype=dtype,
                             initializer=tf.ones_initializer())
    if bias is None:
      bias = tf.get_variable('bias', shape=shape, dtype=dtype,
                             initializer=tf.zeros_initializer())
  return gain*normalized+bias 
Example #11
Source File: metrics_test.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def testAccuracyTopKMetric(self):
    predictions = np.random.randint(1, 5, size=(12, 12, 12, 1))
    targets = np.random.randint(1, 5, size=(12, 12, 12, 1))
    expected = np.mean((predictions == targets).astype(float))
    with self.test_session() as session:
      predicted = tf.one_hot(predictions, depth=5, dtype=tf.float32)
      scores1, _ = metrics.padded_accuracy_topk(
          predicted, tf.constant(targets, dtype=tf.int32), k=1)
      scores2, _ = metrics.padded_accuracy_topk(
          predicted, tf.constant(targets, dtype=tf.int32), k=7)
      a1 = tf.reduce_mean(scores1)
      a2 = tf.reduce_mean(scores2)
      session.run(tf.global_variables_initializer())
      actual1, actual2 = session.run([a1, a2])
    self.assertAlmostEqual(actual1, expected)
    self.assertAlmostEqual(actual2, 1.0) 
Example #12
Source File: transformer_test.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def _create_greedy_infer_model(self):
    """Creates model for greedy inference testing.

    Returns:
      model: A t2t model.
      features: An map of string to tensor.
    """
    model, features = get_model(transformer.transformer_small())

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(100):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    return model, features 
Example #13
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def patch_discriminator(x, filters=64, filter_size=5, n=4,
                        name="patch_discrim"):
  """Patch descriminator."""
  with tf.variable_scope(name):
    x_shape = shape_list(x)
    spatial_dims = [x_shape[1] // 4, x_shape[2] // 4]
    x = tf.random_crop(x, [x_shape[0]] + spatial_dims + [x_shape[3]])
    for i in range(n):
      x = general_conv(
          x=x,
          num_filters=filters * 2**i,
          filter_size=filter_size,
          stride=2 if i != n - 1 else 1,
          stddev=0.02,
          padding="SAME",
          name="c%d" % i,
          do_norm="instance" if i != 0 else False,
          do_relu=i != n - 1,
          relufactor=0.2)
    x = tf.reduce_mean(x, [1, 2])
    return x 
Example #14
Source File: transformer_nat.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def vq_nearest_neighbor(x, hparams):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = 2**hparams.bottleneck_bits
  means = hparams.means
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if hparams.bottleneck_kind == "em":
    x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=bottleneck_size)
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
  x_means = tf.matmul(x_means_hot, means)
  e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means)))
  return x_means_hot, e_loss 
Example #15
Source File: base_vae.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def get_kl_loss(self, means, log_vars, means_p=None, log_vars_p=None):
    """Get KL loss for all the predicted Gaussians."""
    kl_loss = 0.0
    if means_p is None:
      means_p = tf.unstack(tf.zeros_like(means))
    if log_vars_p is None:
      log_vars_p = tf.unstack(tf.zeros_like(log_vars))
    enumerated_inputs = enumerate(zip(means, log_vars, means_p, log_vars_p))
    if self.is_training and self.hparams.stochastic_model:
      for i, (mean, log_var, mean_p, log_var_p) in enumerated_inputs:
        kl_loss += common_layers.kl_divergence(mean, log_var, mean_p, log_var_p)
        tf.summary.histogram("posterior_mean_%d" % i, mean)
        tf.summary.histogram("posterior_log_var_%d" % i, log_var)
        tf.summary.histogram("prior_mean_%d" % i, mean_p)
        tf.summary.histogram("prior_log_var_%d" % i, log_var_p)
      tf.summary.scalar("kl_raw", tf.reduce_mean(kl_loss))

    beta = self.get_beta(kl_loss)
    # information capacity from "Understanding disentangling in beta-VAE"
    if self.hparams.information_capacity > 0.0:
      kl_loss = tf.abs(kl_loss - self.hparams.information_capacity)
    return beta * kl_loss 
Example #16
Source File: autoencoders.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def bottleneck(self, x):
    hparams = self.hparams
    z_size = hparams.bottleneck_bits
    x_shape = common_layers.shape_list(x)
    with tf.variable_scope("vae"):
      mu = tf.layers.dense(x, z_size, name="mu")
      if hparams.mode != tf.estimator.ModeKeys.TRAIN:
        return mu, 0.0  # No sampling or kl loss on eval.
      log_sigma = tf.layers.dense(x, z_size, name="log_sigma")
      epsilon = tf.random_normal(x_shape[:-1] + [z_size])
      z = mu + tf.exp(log_sigma / 2) * epsilon
      kl = 0.5 * tf.reduce_mean(
          tf.expm1(log_sigma) + tf.square(mu) - log_sigma, axis=-1)
      free_bits = z_size // 4
      kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0))
    return z, kl_loss * hparams.kl_beta 
Example #17
Source File: similarity_transformer.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def encode(self, features, input_key):
    hparams = self._hparams
    inputs = common_layers.flatten4d3d(features[input_key])

    (encoder_input, encoder_self_attention_bias, _) = (
        transformer.transformer_prepare_encoder(inputs, problem.SpaceID.EN_TOK,
                                                hparams))

    encoder_input = tf.nn.dropout(encoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)
    encoder_output = transformer.transformer_encoder(
        encoder_input,
        encoder_self_attention_bias,
        hparams,
        nonpadding=transformer.features_to_nonpadding(features, input_key))

    encoder_output = tf.reduce_mean(encoder_output, axis=1)

    return encoder_output 
Example #18
Source File: rouge_test.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def testRougeLMetricE2E(self):
    vocab_size = 4
    batch_size = 12
    seq_length = 12
    predictions = tf.one_hot(
        np.random.randint(vocab_size, size=(batch_size, seq_length, 1, 1)),
        depth=4,
        dtype=tf.float32)
    targets = np.random.randint(4, size=(12, 12, 1, 1))
    with self.test_session() as session:
      scores, _ = rouge.rouge_l_fscore(
          predictions,
          tf.constant(targets, dtype=tf.int32))
      a = tf.reduce_mean(scores)
      session.run(tf.global_variables_initializer())
      session.run(a) 
Example #19
Source File: t2t_model.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def summarize_features(features, num_shards=1):
  """Generate summaries for features."""
  if not common_layers.should_generate_summaries():
    return

  with tf.name_scope("input_stats"):
    for (k, v) in sorted(six.iteritems(features)):
      if (isinstance(v, tf.Tensor) and (v.get_shape().ndims > 1) and
          (v.dtype != tf.string)):
        tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // num_shards)
        tf.summary.scalar("%s_length" % k, tf.shape(v)[1])
        nonpadding = tf.to_float(tf.not_equal(v, 0))
        nonpadding_tokens = tf.reduce_sum(nonpadding)
        tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens)
        tf.summary.scalar("%s_nonpadding_fraction" % k,
                          tf.reduce_mean(nonpadding)) 
Example #20
Source File: t2t_model.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def average_sharded_losses(sharded_losses):
  """Average losses across datashards.

  Args:
    sharded_losses: list<dict<str loss_name, Tensor loss>>. The loss
      can be a single Tensor or a 2-tuple (numerator and denominator).

  Returns:
    losses: dict<str loss_name, Tensor avg_loss>
  """
  losses = {}
  for loss_name in sorted(sharded_losses[0]):
    all_shards = [shard_losses[loss_name] for shard_losses in sharded_losses]
    if isinstance(all_shards[0], tuple):
      sharded_num, sharded_den = zip(*all_shards)
      mean_loss = (
          tf.add_n(sharded_num) / tf.maximum(
              tf.cast(1.0, sharded_den[0].dtype), tf.add_n(sharded_den)))
    else:
      mean_loss = tf.reduce_mean(all_shards)

    losses[loss_name] = mean_loss
  return losses 
Example #21
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def layer_norm_compute(x, epsilon, scale, bias, layer_collection=None):
  """Layer norm raw computation."""

  # Save these before they get converted to tensors by the casting below
  params = (scale, bias)

  epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
  mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
  variance = tf.reduce_mean(
      tf.squared_difference(x, mean), axis=[-1], keepdims=True)
  norm_x = (x - mean) * tf.rsqrt(variance + epsilon)

  output = norm_x * scale + bias


  return output 
Example #22
Source File: metrics_test.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def testTwoClassLogLikelihoodVersusOldImplementation(self):
    def alt_two_class_log_likelihood_impl(predictions, labels):
      float_labels = tf.cast(labels, dtype=tf.float64)
      float_predictions = tf.cast(tf.squeeze(predictions), dtype=tf.float64)
      # likelihood should be just p for class 1, and 1 - p for class 0.
      # signs is 1 for class 1, and -1 for class 0
      signs = 2 * float_labels - tf.ones_like(float_labels)
      # constant_term is 1 for class 0, and 0 for class 1.
      constant_term = tf.ones_like(float_labels) - float_labels
      likelihoods = constant_term + signs * float_predictions
      log_likelihoods = tf.log(likelihoods)
      avg_log_likelihood = tf.reduce_mean(log_likelihoods)
      return avg_log_likelihood
    predictions = np.random.rand(1, 10, 1)
    targets = np.random.randint(2, size=10)
    with self.test_session() as session:
      new_log_likelihood, _ = metrics.two_class_log_likelihood(
          predictions, targets)
      alt_log_likelihood = alt_two_class_log_likelihood_impl(
          predictions, targets)
      new_impl, alt_impl = session.run([new_log_likelihood, alt_log_likelihood])
    self.assertAlmostEqual(new_impl, alt_impl) 
Example #23
Source File: savp.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def g_step(self, gen_frames, fake_logits_stop):
    """Performs the generator step in computing the GAN loss.

    Args:
      gen_frames: Generated frames
      fake_logits_stop: Logits corresponding to the generated frames as per
                        the discriminator. Assumed to have a stop-gradient term.
    Returns:
      gan_g_loss_pos_d: Loss.
      gan_g_loss_neg_d: -gan_g_loss_pos_d but with a stop gradient on generator.
    """
    hparam_to_gen_loss = {
        "least_squares": gan_losses.least_squares_generator_loss,
        "cross_entropy": gan_losses.modified_generator_loss,
        "wasserstein": gan_losses.wasserstein_generator_loss
    }

    fake_logits = self.discriminator(gen_frames)
    mean_fake_logits = tf.reduce_mean(fake_logits)
    tf.summary.scalar("mean_fake_logits", mean_fake_logits)

    # Generator loss.
    # Using gan_g_loss_pos_d updates the discriminator as well.
    # To avoid this add gan_g_loss_neg_d = -gan_g_loss_pos_d
    # but with stop gradient on the generator.
    # This makes sure that the net gradient on the discriminator is zero and
    # net-gradient on the generator is just due to the gan_g_loss_pos_d.
    generator_loss_func = hparam_to_gen_loss[self.hparams.gan_loss]
    gan_g_loss_pos_d = generator_loss_func(
        discriminator_gen_outputs=fake_logits, add_summaries=True)
    gan_g_loss_neg_d = -generator_loss_func(
        discriminator_gen_outputs=fake_logits_stop, add_summaries=True)
    return gan_g_loss_pos_d, gan_g_loss_neg_d 
Example #24
Source File: area_attention_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def test2DAreaMax(self):
    batch_size = 256
    feature_len = 100
    memory_height = 10
    heads = 2
    key_len = 6
    depth = 128
    max_area_height = 3
    max_area_width = 3
    queries = tf.random_uniform([batch_size, heads, key_len, depth],
                                minval=-10.0, maxval=10.0)
    features = tf.random_uniform([batch_size, heads, feature_len, depth],
                                 minval=-10.0, maxval=10.0)
    target_values = tf.random_uniform([batch_size, heads, key_len, depth],
                                      minval=-0.2, maxval=0.2)
    keys = tf.layers.dense(features, units=depth)
    values = tf.layers.dense(features, units=depth)
    max_attention = area_attention.dot_product_area_attention(
        queries, keys, values,
        bias=None,
        area_key_mode="max",
        area_value_mode="max",
        name="max_key",
        max_area_width=max_area_width,
        max_area_height=max_area_height,
        memory_height=memory_height)
    max_gradients = tf.gradients(tf.reduce_mean(
        tf.pow(target_values - max_attention, 2)), features)
    with self.test_session() as session:
      session.run(tf.global_variables_initializer())
      result1, result2 = session.run([max_gradients, max_attention])
    self.assertFalse(np.any(np.logical_not(np.isfinite(result1))))
    self.assertFalse(np.any(np.logical_not(np.isfinite(result2)))) 
Example #25
Source File: evolved_transformer_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def testBeamVsFast(self):
    model, features = get_model(transformer.transformer_tiny())

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      beam_result = model._beam_decode_slow(
          features, decode_length, beam_size=4, top_beams=1,
          alpha=1.0)["outputs"]

      fast_result = model._beam_decode(
          features, decode_length, beam_size=4, top_beams=1,
          alpha=1.0)["outputs"]

    with self.test_session():
      beam_res = beam_result.eval()
      fast_res = fast_result.eval()

    self.assertAllClose(beam_res, fast_res) 
Example #26
Source File: evolved_transformer_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def testSlowVsFastNoInput(self):
    model, features = get_model(transformer.transformer_tiny(), has_input=False)

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      slow_result = model._slow_greedy_infer(features, decode_length)["outputs"]
      slow_result = tf.squeeze(slow_result, axis=[2, 3])

      fast_result = model._greedy_infer(features, decode_length)["outputs"]

    with self.test_session():
      slow_res = slow_result.eval()
      fast_res = fast_result.eval()

    self.assertEqual(slow_res.shape, (BATCH_SIZE, decode_length))
    self.assertAllClose(slow_res, fast_res) 
Example #27
Source File: transformer_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def testSlowVsFast(self, get_model_fn=None, p=None):
    if get_model_fn:
      model, features = get_model_fn(param_overrides=p)
    else:
      model, features = get_model(transformer.transformer_small())

    decode_length = 3

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(100):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      greedy_result = model._slow_greedy_infer(
          features, decode_length)["outputs"]
      greedy_result = tf.squeeze(greedy_result, axis=[2, 3])

      fast_result = model._greedy_infer(features, decode_length)["outputs"]

    with self.test_session():
      greedy_res = greedy_result.eval()
      fast_res = fast_result.eval()

    self.assertEqual(fast_res.shape, (BATCH_SIZE, INPUT_LENGTH + decode_length))
    self.assertAllClose(greedy_res, fast_res) 
Example #28
Source File: transformer_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def testBeamVsFast(self):
    model, features = get_model(transformer.transformer_small())

    decode_length = 2

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(100):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      beam_result = model._beam_decode_slow(
          features,
          decode_length,
          beam_size=4,
          top_beams=1,
          alpha=1.0)["outputs"]

      fast_result = model._beam_decode(
          features,
          decode_length,
          beam_size=4,
          top_beams=1,
          alpha=1.0)["outputs"]

    with self.test_session():
      beam_res = beam_result.eval()
      fast_res = fast_result.eval()

    self.assertAllClose(beam_res, fast_res) 
Example #29
Source File: slicenet.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def slicenet_internal(inputs, targets, target_space, hparams, run_decoder=True):
  """The slicenet model, main step used for training."""
  with tf.variable_scope("slicenet"):
    # Project to hidden size if necessary
    if inputs.get_shape().as_list()[-1] != hparams.hidden_size:
      inputs = common_layers.conv_block(
          inputs,
          hparams.hidden_size, [((1, 1), (3, 3))],
          first_relu=False,
          padding="SAME",
          force2d=True)

    # Flatten inputs and encode.
    inputs = tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2)
    inputs_mask = 1.0 - embedding_to_padding(inputs)
    inputs = common_layers.add_timing_signal(inputs)  # Add position info.
    target_space_emb = embed_target_space(target_space, hparams.hidden_size)
    extra_layers = int(hparams.num_hidden_layers * 1.5)
    inputs_encoded = multi_conv_res(
        inputs, "SAME", "encoder", extra_layers, hparams, mask=inputs_mask)
    if not run_decoder:
      return inputs_encoded
    # Do the middle part.
    decoder_start, similarity_loss = slicenet_middle(
        inputs_encoded, targets, target_space_emb, inputs_mask, hparams)
    # Decode.
    decoder_final = multi_conv_res(
        decoder_start,
        "LEFT",
        "decoder",
        hparams.num_hidden_layers,
        hparams,
        mask=inputs_mask,
        source=inputs_encoded)
    return decoder_final, tf.reduce_mean(similarity_loss) 
Example #30
Source File: transformer_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def testSlowVsFastNoInput(self):
    model, features = get_model(
        transformer.transformer_small(), has_input=False)

    decode_length = 3

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(100):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      slow_result = model._slow_greedy_infer(
          features, decode_length)["outputs"]
      slow_result = tf.squeeze(slow_result, axis=[2, 3])

      fast_result = model._greedy_infer(features, decode_length)["outputs"]

    with self.test_session():
      slow_res = slow_result.eval()
      fast_res = fast_result.eval()

    self.assertEqual(slow_res.shape, (BATCH_SIZE, decode_length))
    self.assertAllClose(slow_res, fast_res)