Python tensorflow.compat.v1.stop_gradient() Examples

The following are 30 code examples of tensorflow.compat.v1.stop_gradient(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.compat.v1 , or try the search function .
Example #1
Source File: transformer_nat.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def vq_nearest_neighbor(x, hparams):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = 2**hparams.bottleneck_bits
  means = hparams.means
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if hparams.bottleneck_kind == "em":
    x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=bottleneck_size)
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
  x_means = tf.matmul(x_means_hot, means)
  e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means)))
  return x_means_hot, e_loss 
Example #2
Source File: multi_head_dqn_agent.py    From batch_rl with Apache License 2.0 6 votes vote down vote up
def _build_train_op(self):
    """Builds a training op.

    Returns:
      train_op: An op performing one step of training from replay data.
    """
    actions = self._replay.actions
    indices = tf.stack([tf.range(actions.shape[0]), actions], axis=-1)
    replay_chosen_q = tf.gather_nd(
        self._replay_net_outputs.q_heads, indices=indices)
    target = tf.stop_gradient(self._build_target_q_op())
    loss = tf.losses.huber_loss(
        target, replay_chosen_q, reduction=tf.losses.Reduction.NONE)
    q_head_losses = tf.reduce_mean(loss, axis=0)
    final_loss = tf.reduce_mean(q_head_losses)
    if self.summary_writer is not None:
      with tf.variable_scope('Losses'):
        tf.summary.scalar('HuberLoss', final_loss)
    return self.optimizer.minimize(final_loss) 
Example #3
Source File: autoencoders.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def gumbel_sample(self, reconstr_gan):
    hparams = self.hparams
    is_training = hparams.mode == tf.estimator.ModeKeys.TRAIN
    vocab_size = self._problem_hparams.vocab_size["targets"]
    if hasattr(self._hparams, "vocab_divisor"):
      vocab_size += (-vocab_size) % self._hparams.vocab_divisor
    reconstr_gan = tf.nn.log_softmax(reconstr_gan)
    if is_training and hparams.gumbel_temperature > 0.0:
      gumbel_samples = discretization.gumbel_sample(
          common_layers.shape_list(reconstr_gan))
      gumbel_samples *= hparams.gumbel_noise_factor
      reconstr_gan += gumbel_samples
      reconstr_sample = latent_layers.multinomial_sample(
          reconstr_gan, temperature=hparams.gumbel_temperature)
      reconstr_gan = tf.nn.softmax(reconstr_gan / hparams.gumbel_temperature)
    else:
      reconstr_sample = tf.argmax(reconstr_gan, axis=-1)
      reconstr_gan = tf.nn.softmax(reconstr_gan / 0.1)  # Sharpen a bit.
    # Use 1-hot forward, softmax backward.
    reconstr_hot = tf.one_hot(reconstr_sample, vocab_size)
    reconstr_gan += reconstr_hot - tf.stop_gradient(reconstr_gan)
    return reconstr_gan 
Example #4
Source File: discretization.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def tanh_discrete_bottleneck(x, bottleneck_bits, bottleneck_noise,
                             discretize_warmup_steps, mode):
  """Simple discretization through tanh, flip bottleneck_noise many bits."""
  x = tf.layers.dense(x, bottleneck_bits, name="tanh_discrete_bottleneck")
  d0 = tf.stop_gradient(2.0 * tf.to_float(tf.less(0.0, x))) - 1.0
  if mode == tf.estimator.ModeKeys.TRAIN:
    x += tf.truncated_normal(
        common_layers.shape_list(x), mean=0.0, stddev=0.2)
  x = tf.tanh(x)
  d = x + tf.stop_gradient(2.0 * tf.to_float(tf.less(0.0, x)) - 1.0 - x)
  if mode == tf.estimator.ModeKeys.TRAIN:
    noise = tf.random_uniform(common_layers.shape_list(x))
    noise = 2.0 * tf.to_float(tf.less(bottleneck_noise, noise)) - 1.0
    d *= noise
  d = common_layers.mix(d, x, discretize_warmup_steps,
                        mode == tf.estimator.ModeKeys.TRAIN)
  return d, d0 
Example #5
Source File: utils.py    From lamb with Apache License 2.0 6 votes vote down vote up
def mask_from_lengths(lengths, max_length=None, dtype=None, name=None):
  """Convert a length scalar to a vector of binary masks.

  This function will convert a vector of lengths to a matrix of binary masks.
  E.g. [2, 4, 3] will become [[1, 1, 0, 0], [1, 1, 1, 1], [1, 1, 1, 0]]

  Args:
    lengths: a d-dimensional vector of integers corresponding to lengths.
    max_length: an optional (default: None) scalar-like or 0-dimensional tensor
      indicating the maximum length of the masks. If not provided, the maximum
      length will be inferred from the lengths vector.
    dtype: the dtype of the returned mask, if specified. If None, the dtype of
      the lengths will be used.
    name: a name for the operation (optional).

  Returns:
    A d x max_length tensor of binary masks (int32).
  """
  with tf.name_scope(name, 'mask_from_lengths'):
    dtype = lengths.dtype if dtype is None else dtype
    max_length = tf.reduce_max(lengths) if max_length is None else max_length
    indexes = tf.range(max_length, dtype=lengths.dtype)
    mask = tf.less(tf.expand_dims(indexes, 0), tf.expand_dims(lengths, 1))
    cast_mask = tf.cast(mask, dtype)
  return tf.stop_gradient(cast_mask) 
Example #6
Source File: discretization.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def bit_to_int(x_bit, num_bits, base=2):
  """Turn x_bit representing numbers bitwise (lower-endian) to int tensor.

  Args:
    x_bit: Tensor containing numbers in a particular base to be converted to
      int.
    num_bits: Number of bits in the representation.
    base: Base of the representation.

  Returns:
    Integer representation of this number.
  """
  x_l = tf.stop_gradient(tf.to_int32(tf.reshape(x_bit, [-1, num_bits])))
  x_labels = [
      x_l[:, i] * tf.to_int32(base)**tf.to_int32(i) for i in range(num_bits)]
  res = sum(x_labels)
  return tf.to_int32(tf.reshape(res, common_layers.shape_list(x_bit)[:-1])) 
Example #7
Source File: shake_shake.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def shake_shake_branch(x, output_filters, stride, rand_forward, rand_backward,
                       hparams):
  """Building a 2 branching convnet."""
  is_training = hparams.mode == tf.estimator.ModeKeys.TRAIN
  x = tf.nn.relu(x)
  x = tf.layers.conv2d(
      x,
      output_filters, (3, 3),
      strides=(stride, stride),
      padding="SAME",
      name="conv1")
  x = tf.layers.batch_normalization(x, training=is_training, name="bn1")
  x = tf.nn.relu(x)
  x = tf.layers.conv2d(x, output_filters, (3, 3), padding="SAME", name="conv2")
  x = tf.layers.batch_normalization(x, training=is_training, name="bn2")
  if is_training:
    x = x * rand_backward + tf.stop_gradient(x * rand_forward -
                                             x * rand_backward)
  else:
    x *= 1.0 / hparams.shake_shake_num_branches
  return x 
Example #8
Source File: vq_discrete.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def bit_to_int(self, x_bit, num_bits, base=2):
    """Turn x_bit representing numbers bitwise (lower-endian) to int tensor.

    Args:
        x_bit: Tensor containing numbers in a particular base to be
        converted to
        int.
        num_bits: Number of bits in the representation.
        base: Base of the representation.

    Returns:
        Integer representation of this number.
    """
    x_l = tf.stop_gradient(tf.to_int32(tf.reshape(x_bit, [-1, num_bits])))
    # pylint: disable=g-complex-comprehension
    x_labels = [
        x_l[:, i] * tf.to_int32(base)**tf.to_int32(i) for i in range(num_bits)]
    res = sum(x_labels)
    return tf.to_int32(tf.reshape(res, common_layers.shape_list(x_bit)[:-1])) 
Example #9
Source File: vq_discrete.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def embedding_lookup(self, x, means):
    """Compute nearest neighbors and loss for training the embeddings.

    Args:
        x: Batch of encoder continuous latent states sliced/projected into
        shape
        [-1, num_blocks, block_dim].
        means: Embedding means.

    Returns:
        The nearest neighbor in one hot form, the nearest neighbor
        itself, the
        commitment loss, embedding training loss.
    """
    x_means_hot = self.nearest_neighbor(x, means)
    x_means_hot_flat = tf.reshape(
        x_means_hot, [-1, self.hparams.num_blocks, self.hparams.block_v_size])
    x_means = tf.matmul(tf.transpose(x_means_hot_flat, perm=[1, 0, 2]), means)
    x_means = tf.transpose(x_means, [1, 0, 2])
    q_loss = tf.reduce_mean(
        tf.squared_difference(tf.stop_gradient(x), x_means))
    e_loss = tf.reduce_mean(
        tf.squared_difference(x, tf.stop_gradient(x_means)))
    return x_means_hot, x_means, q_loss, e_loss 
Example #10
Source File: base.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def pixels_from_softmax(frame_logits, pure_sampling=False,
                        temperature=1.0, gumbel_noise_factor=0.2):
  """Given frame_logits from a per-pixel softmax, generate colors."""
  # If we're purely sampling, just sample each pixel.
  if pure_sampling or temperature == 0.0:
    return common_layers.sample_with_temperature(frame_logits, temperature)

  # Gumbel-sample from the pixel sofmax and average by pixel values.
  pixel_range = tf.to_float(tf.range(256))
  for _ in range(len(frame_logits.get_shape().as_list()) - 1):
    pixel_range = tf.expand_dims(pixel_range, axis=0)

  frame_logits = tf.nn.log_softmax(frame_logits)
  gumbel_samples = discretization.gumbel_sample(
      common_layers.shape_list(frame_logits)) * gumbel_noise_factor

  frame = tf.nn.softmax((frame_logits + gumbel_samples) / temperature, axis=-1)
  result = tf.reduce_sum(frame * pixel_range, axis=-1)
  # Round on the forward pass, not on the backward one.
  return result + tf.stop_gradient(tf.round(result) - result) 
Example #11
Source File: archs.py    From compression with Apache License 2.0 5 votes vote down vote up
def _get_moments(self, inputs):
    # Like tf.nn.moments but unbiased sample std. deviation.
    # Reduce over channels only.
    mean = tf.reduce_mean(inputs, [self.axis], keepdims=True, name="mean")
    variance = tf.reduce_sum(
        tf.squared_difference(inputs, tf.stop_gradient(mean)),
        [self.axis], keepdims=True, name="variance_sum")
    # Divide by N-1
    inputs_shape = tf.shape(inputs)
    counts = tf.reduce_prod([inputs_shape[ax] for ax in [self.axis]])
    variance /= (tf.cast(counts, tf.float32) - 1)
    return mean, variance 
Example #12
Source File: layers.py    From interval-bound-propagation with Apache License 2.0 5 votes vote down vote up
def scale(self):
    self._ensure_is_connected()
    return tf.stop_gradient(self._gamma) if self._gamma is not None else None 
Example #13
Source File: layers.py    From interval-bound-propagation with Apache License 2.0 5 votes vote down vote up
def bias(self):
    self._ensure_is_connected()
    return tf.stop_gradient(self._beta) if self._beta is not None else None 
Example #14
Source File: layers.py    From interval-bound-propagation with Apache License 2.0 5 votes vote down vote up
def variance(self):
    self._ensure_is_connected()
    return tf.stop_gradient(self._variance) 
Example #15
Source File: model.py    From compression with Apache License 2.0 5 votes vote down vote up
def _compute_discriminator_out(self,
                                 nodes: Nodes,
                                 create_summaries,
                                 gradients_to_generator=True
                                ) -> archs.DiscOutSplit:
    """Get discriminator outputs."""
    with tf.name_scope("disc"):
      input_image = nodes.input_image_scaled
      reconstruction = nodes.reconstruction_scaled

      if not gradients_to_generator:
        reconstruction = tf.stop_gradient(reconstruction)

      discriminator_in = tf.concat([input_image, reconstruction], axis=0)

      # Condition D.
      latent = tf.stop_gradient(nodes.latent_quantized)
      latent = tf.concat([latent, latent], axis=0)

      discriminator_in = (discriminator_in, latent)

      disc_out_all = self._discriminator(discriminator_in,
                                         training=self.training)

    d_real, d_fake = tf.split(disc_out_all.d_all, 2)
    d_real_logits, d_fake_logits = tf.split(disc_out_all.d_all_logits, 2)
    disc_out_split = archs.DiscOutSplit(d_real, d_fake,
                                        d_real_logits, d_fake_logits)

    if create_summaries:
      tf.summary.scalar("d_real", tf.reduce_mean(disc_out_split.d_real))
      tf.summary.scalar("d_fake", tf.reduce_mean(disc_out_split.d_fake))

    return disc_out_split 
Example #16
Source File: layers.py    From interval-bound-propagation with Apache License 2.0 5 votes vote down vote up
def mean(self):
    self._ensure_is_connected()
    return tf.stop_gradient(self._mean) 
Example #17
Source File: archs.py    From compression with Apache License 2.0 5 votes vote down vote up
def _quantize(inputs, mean):
  half = tf.constant(.5, dtype=tf.float32)
  outputs = inputs
  outputs -= mean
  # Rounding latents for the forward pass (straight-through).
  outputs = outputs + tf.stop_gradient(tf.math.floor(outputs + half) - outputs)
  outputs += mean
  return outputs 
Example #18
Source File: entropy_models.py    From compression with Apache License 2.0 5 votes vote down vote up
def _logits_cumulative(self, inputs, stop_gradient):
    """Evaluate logits of the cumulative densities.

    Arguments:
      inputs: The values at which to evaluate the cumulative densities, expected
        to be a `Tensor` of shape `(channels, 1, batch)`.
      stop_gradient: Boolean. Whether to add `tf.stop_gradient` calls so
        that the gradient of the output with respect to the density model
        parameters is disconnected (the gradient with respect to `inputs` is
        left untouched).

    Returns:
      A `Tensor` of the same shape as `inputs`, containing the logits of the
      cumulative densities evaluated at the given inputs.
    """
    logits = inputs

    for i in range(len(self.filters) + 1):
      matrix = self._matrices[i]
      if stop_gradient:
        matrix = tf.stop_gradient(matrix)
      logits = tf.linalg.matmul(matrix, logits)

      bias = self._biases[i]
      if stop_gradient:
        bias = tf.stop_gradient(bias)
      logits += bias

      if i < len(self._factors):
        factor = self._factors[i]
        if stop_gradient:
          factor = tf.stop_gradient(factor)
        logits += factor * tf.math.tanh(logits)

    return logits 
Example #19
Source File: entropy_models.py    From compression with Apache License 2.0 5 votes vote down vote up
def _likelihood(self, inputs):
    ndim, channel_axis, _, _ = self._get_input_dims()
    half = tf.constant(.5, dtype=self.dtype)

    # Convert to (channels, 1, batch) format by commuting channels to front
    # and then collapsing.
    order = list(range(ndim))
    order.pop(channel_axis)
    order.insert(0, channel_axis)
    inputs = tf.transpose(inputs, order)
    shape = tf.shape(inputs)
    inputs = tf.reshape(inputs, (shape[0], 1, -1))

    # Evaluate densities.
    # We can use the special rule below to only compute differences in the left
    # tail of the sigmoid. This increases numerical stability: sigmoid(x) is 1
    # for large x, 0 for small x. Subtracting two numbers close to 0 can be done
    # with much higher precision than subtracting two numbers close to 1.
    lower = self._logits_cumulative(inputs - half, stop_gradient=False)
    upper = self._logits_cumulative(inputs + half, stop_gradient=False)
    # Flip signs if we can move more towards the left tail of the sigmoid.
    sign = -tf.math.sign(tf.math.add_n([lower, upper]))
    sign = tf.stop_gradient(sign)
    likelihood = abs(
        tf.math.sigmoid(sign * upper) - tf.math.sigmoid(sign * lower))

    # Convert back to input tensor shape.
    order = list(range(1, ndim))
    order.insert(channel_axis, 0)
    likelihood = tf.reshape(likelihood, shape)
    likelihood = tf.transpose(likelihood, order)

    return likelihood 
Example #20
Source File: value_ops.py    From trfl with Apache License 2.0 5 votes vote down vote up
def td_learning(v_tm1, r_t, pcont_t, v_t, name="TDLearning"):
  """Implements the TD(0)-learning loss as a TensorFlow op.

  The TD loss is `0.5` times the squared difference between `v_tm1` and
  the target `r_t + pcont_t * v_t`.

  See "Learning to Predict by the Methods of Temporal Differences" by Sutton.
  (https://link.springer.com/article/10.1023/A:1022633531479).

  Args:
    v_tm1: Tensor holding values at previous timestep, shape `[B]`.
    r_t: Tensor holding rewards, shape `[B]`.
    pcont_t: Tensor holding pcontinue values, shape `[B]`.
    v_t: Tensor holding values at current timestep, shape `[B]`.
    name: name to prefix ops created by this function.

  Returns:
    A namedtuple with fields:

    * `loss`: a tensor containing the batch of losses, shape `[B]`.
    * `extra`: a namedtuple with fields:
        * `target`: batch of target values for `v_tm1`, shape `[B]`.
        * `td_error`: batch of temporal difference errors, shape `[B]`.
  """
  # Rank and compatibility checks.
  base_ops.wrap_rank_shape_assert([[v_tm1, v_t, r_t, pcont_t]], [1], name)

  # TD(0)-learning op.
  with tf.name_scope(name, values=[v_tm1, r_t, pcont_t, v_t]):

    # Build target.
    target = tf.stop_gradient(r_t + pcont_t * v_t)

    # Temporal difference error and loss.
    # Loss is MSE scaled by 0.5, so the gradient is equal to the TD error.
    td_error = target - v_tm1
    loss = 0.5 * tf.square(td_error)
    return base_ops.LossOutput(loss, TDExtra(target, td_error)) 
Example #21
Source File: value_ops.py    From trfl with Apache License 2.0 5 votes vote down vote up
def qv_max(v_tm1, r_t, pcont_t, q_t, name="QVMAX"):
  """Implements the QVMAX learning loss as a TensorFlow op.

  The QVMAX loss is `0.5` times the squared difference between `v_tm1` and
  the target `r_t + pcont_t * max q_t`, where `q_t` is separately learned
  through QV learning (c.f. `action_value_ops.qv_learning`).

  See "The QV Family Compared to Other Reinforcement Learning Algorithms" by
  Wiering and van Hasselt (2009).
  (http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.713.1931)

  Args:
    v_tm1: Tensor holding values at previous timestep, shape `[B]`.
    r_t: Tensor holding rewards, shape `[B]`.
    pcont_t: Tensor holding pcontinue values, shape `[B]`.
    q_t: Tensor of action values at current timestep, shape `[B, num_actions]`.
    name: name to prefix ops created by this function.

  Returns:
    A namedtuple with fields:

    * `loss`: a tensor containing the batch of losses, shape `[B]`.
    * `extra`: a namedtuple with fields:
        * `target`: batch of target values for `v_tm1`, shape `[B]`.
        * `td_error`: batch of temporal difference errors, shape `[B]`.
  """
  # Rank and compatibility checks.
  base_ops.wrap_rank_shape_assert([[v_tm1, r_t, pcont_t], [q_t]], [1, 2], name)

  # The QVMAX op.
  with tf.name_scope(name, values=[v_tm1, r_t, pcont_t, q_t]):

    # Build target.
    target = tf.stop_gradient(r_t + pcont_t * tf.reduce_max(q_t, axis=1))

    # Temporal difference error and loss.
    # Loss is MSE scaled by 0.5, so the gradient is equal to the TD error.
    td_error = target - v_tm1
    loss = 0.5 * tf.square(td_error)
    return base_ops.LossOutput(loss, TDExtra(target, td_error)) 
Example #22
Source File: dist_value_ops.py    From trfl with Apache License 2.0 5 votes vote down vote up
def _slice_with_actions(embeddings, actions):
  """Slice a Tensor.

  Take embeddings of the form [batch_size, num_actions, embed_dim]
  and actions of the form [batch_size, 1], and return the sliced embeddings
  like embeddings[:, actions, :].

  Args:
    embeddings: Tensor of embeddings to index.
    actions: int Tensor to use as index into embeddings

  Returns:
    Tensor of embeddings indexed by actions
  """
  batch_size, num_actions = embeddings.get_shape()[:2]

  # Values are the 'values' in a sparse tensor we will be setting
  act_indx = tf.cast(actions, tf.int64)[:, None]
  values = tf.reshape(tf.cast(tf.ones(tf.shape(actions)), tf.bool), [-1])

  # Create a range for each index into the batch
  act_range = tf.range(0, batch_size, dtype=tf.int64)[:, None]
  # Combine this into coordinates with the action indices
  indices = tf.concat([act_range, act_indx], 1)

  actions_mask = tf.SparseTensor(indices, values, [batch_size, num_actions])
  actions_mask = tf.stop_gradient(
      tf.sparse_tensor_to_dense(actions_mask, default_value=False))
  sliced_emb = tf.boolean_mask(embeddings, actions_mask)
  return sliced_emb 
Example #23
Source File: policy_gradient_ops.py    From trfl with Apache License 2.0 5 votes vote down vote up
def policy_gradient(policies, actions, action_values, policy_vars=None,
                    name="policy_gradient"):
  """Computes policy gradient losses for a batch of trajectories.

  See `policy_gradient_loss` for more information on expected inputs and usage.

  Args:
    policies: A distribution over a batch supporting a `log_prob` method, e.g.
        an instance of `tfp.distributions.Distribution`. For example, for
        a diagonal gaussian policy:
        `policies = tfp.distributions.MultivariateNormalDiag(mus, sigmas)`
    actions: An action batch Tensor used as the argument for `log_prob`. Has
        shape equal to the batch shape of the policies concatenated with the
        event shape of the policies (which may be scalar, in which case
        concatenation leaves shape just equal to batch shape).
    action_values: A Tensor containing estimates of the values of the `actions`.
        Has shape equal to the batch shape of the policies.
    policy_vars: An optional iterable of Tensors used by `policies`. If provided
        is used in scope checks. For the multivariate normal example above this
        would be `[mus, sigmas]`.
    name: Customises the name_scope for this op.

  Returns:
    loss: Tensor with same shape as `actions` containing the total loss for each
        element in the batch. Differentiable w.r.t the variables in `policies`
        only.
  """
  policy_vars = list(policy_vars) if policy_vars else list()
  with tf.name_scope(values=policy_vars + [actions, action_values], name=name):
    actions = tf.stop_gradient(actions)
    action_values = tf.stop_gradient(action_values)
    log_prob_actions = policies.log_prob(actions)
    # Prevent accidental broadcasting if possible at construction time.
    action_values.get_shape().assert_is_compatible_with(
        log_prob_actions.get_shape())
    return -tf.multiply(log_prob_actions, action_values) 
Example #24
Source File: learner.py    From meta-dataset with Apache License 2.0 5 votes vote down vote up
def gradient_descent_step(loss,
                          variables,
                          stop_grads,
                          allow_grads_to_batch_norm_vars,
                          learning_rate,
                          get_update_ops=True):
  """Returns the updated vars after one step of gradient descent."""
  grads = tf.gradients(loss, variables)

  if stop_grads:
    grads = [tf.stop_gradient(dv) for dv in grads]

  def _apply_grads(variables, grads):
    """Applies gradients using SGD on a list of variables."""
    v_new, update_ops = [], []
    for (v, dv) in zip(variables, grads):
      if (not allow_grads_to_batch_norm_vars and
          ('offset' in v.name or 'scale' in v.name)):
        updated_value = v  # no update.
      else:
        updated_value = v - learning_rate * dv  # gradient descent update.
        if get_update_ops:
          update_ops.append(tf.assign(v, updated_value))
      v_new.append(updated_value)
    return v_new, update_ops

  updated_vars, update_ops = _apply_grads(variables, grads)
  return {'updated_vars': updated_vars, 'update_ops': update_ops} 
Example #25
Source File: cycle_gan.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def discriminator(x, compress, hparams, name, reuse=None):
  with tf.variable_scope(name, reuse=reuse):
    x = tf.stop_gradient(2 * x) - x  # Reverse gradient.
    if compress:
      x = transformer_vae.compress(x, None, False, hparams, "compress")
    else:
      x = transformer_vae.residual_conv(x, 1, 3, hparams, "compress_rc")
    y = tf.reduce_mean(x, axis=1)
    return tf.tanh(tf.layers.dense(y, 1, name="reduce")) 
Example #26
Source File: utils.py    From lamb with Apache License 2.0 5 votes vote down vote up
def compute_lengths(symbols_list, eos_symbol, name=None,
                    dtype=tf.int64):
  """Computes sequence lengths given end-of-sequence symbol.

  Args:
    symbols_list: list of [batch_size] tensors of symbols (e.g. integers).
    eos_symbol: end of sequence symbol (e.g. integer).
    name: name for the name scope of this op.
    dtype: type of symbols, default: tf.int64.

  Returns:
    Tensor [batch_size] of lengths of sequences.
  """
  with tf.name_scope(name, 'compute_lengths'):
    max_len = len(symbols_list)
    eos_symbol_ = tf.constant(eos_symbol, dtype=dtype)
    # Array with max_len-time where we have EOS, 0 otherwise. Maximum of this is
    # the first EOS in that example.
    ends = [tf.constant(max_len - i, dtype=tf.int64)
            * tf.to_int64(tf.equal(s, eos_symbol_))
            for i, s in enumerate(symbols_list)]
    # Lengths of sequences, or max_len for sequences that didn't have EOS.
    # Note: examples that don't have EOS will have max value of 0 and value of
    # max_len+1 in lens_.
    lens_ = max_len + 1 - tf.reduce_max(tf.stack(ends, 1), axis=1)
    # For examples that didn't have EOS decrease max_len+1 to max_len as the
    # length.
    lens = tf.subtract(lens_, tf.to_int64(tf.equal(lens_, max_len + 1)))
    return tf.stop_gradient(tf.reshape(lens, [-1])) 
Example #27
Source File: transformer_nat.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def get_latent_pred_loss(latents_pred, latents_discrete_hot, hparams):
  """Latent prediction and loss."""
  latents_logits = tf.layers.dense(
      latents_pred, 2**hparams.bottleneck_bits, name="extra_logits")
  loss = tf.nn.softmax_cross_entropy_with_logits_v2(
      labels=tf.stop_gradient(latents_discrete_hot), logits=latents_logits)
  return loss 
Example #28
Source File: autoencoders.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def reverse_gradient(x, lr=1.0):
  return -lr * x + tf.stop_gradient((1.0 + lr) * x) 
Example #29
Source File: autoencoders.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def bottleneck(self, x):
    hparams = self.hparams
    x = tf.tanh(tf.layers.dense(x, hparams.bottleneck_bits, name="bottleneck"))
    d = x + tf.stop_gradient(2.0 * tf.to_float(tf.less(0.0, x)) - 1.0 - x)
    if hparams.mode == tf.estimator.ModeKeys.TRAIN:
      noise = tf.random_uniform(common_layers.shape_list(x))
      noise = 2.0 * tf.to_float(tf.less(hparams.bottleneck_noise, noise)) - 1.0
      d *= noise
    x = common_layers.mix(d, x, hparams.discretize_warmup_steps,
                          hparams.mode == tf.estimator.ModeKeys.TRAIN)
    return x, 0.0 
Example #30
Source File: transformer_vae_flow_prior_ops.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def predict_target_lengths(
    encoder_output, inputs_mask, hparams, length_diff=None):
  """Predict target lengths."""
  bound = hparams.lendiff_bound
  inputs_length = tf.cast(tf.reduce_sum(inputs_mask, 1), tf.int32)
  targets_length = inputs_length
  loss = None
  if hparams.predict_target_length:
    encoder_output = gops.reduce_mean_over_l(encoder_output, inputs_mask)
    logits = tf.stop_gradient(encoder_output)
    logits = lenpred_mlp("lenpred", logits, hparams.hidden_size, bound)
    if length_diff is not None:
      labels = tf.maximum(tf.minimum(length_diff, bound), -bound)
      labels = tf.cast(labels + bound, tf.int32)
      labels = tf.stop_gradient(labels)
      loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
          labels=labels, logits=logits)
      loss = tf.reduce_mean(loss)
    diff_pred = tf.argmax(logits, 1)
    diff_pred = tf.cast(diff_pred - bound, tf.int32)
    targets_length = inputs_length + diff_pred
    targets_length = tf.maximum(targets_length, 1)
  divi = 4
  targets_length = tf.ceil(targets_length / divi) * divi
  targets_length = tf.cast(targets_length, tf.int32)
  return targets_length, loss