Python tensorflow.compat.v1.range() Examples

The following are 30 code examples of tensorflow.compat.v1.range(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.compat.v1 , or try the search function .
Example #1
Source File: utils.py    From lamb with Apache License 2.0 6 votes vote down vote up
def layer_norm(x, reduction_indices, epsilon=1e-9, gain=None, bias=None,
               per_element=True, scope=None):
  """DOC."""
  reduction_indices = ensure_list(reduction_indices)
  mean = tf.reduce_mean(x, reduction_indices, keep_dims=True)
  variance = tf.reduce_mean(tf.squared_difference(x, mean),
                            reduction_indices, keep_dims=True)
  normalized = (x - mean) / tf.sqrt(variance + epsilon)
  dtype = x.dtype
  shape = x.get_shape().as_list()
  for i in six.moves.range(len(shape)):
    if i not in reduction_indices or not per_element:
      shape[i] = 1
  with tf.variable_scope(scope or 'layer_norm'):
    if gain is None:
      gain = tf.get_variable('gain', shape=shape, dtype=dtype,
                             initializer=tf.ones_initializer())
    if bias is None:
      bias = tf.get_variable('bias', shape=shape, dtype=dtype,
                             initializer=tf.zeros_initializer())
  return gain*normalized+bias 
Example #2
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def summarize_video(video, prefix, max_outputs=1):
  """Summarize the video using image summaries starting with prefix."""
  video_shape = shape_list(video)
  if len(video_shape) != 5:
    raise ValueError("Assuming videos given as tensors in the format "
                     "[batch, time, height, width, channels] but got one "
                     "of shape: %s" % str(video_shape))
  if tf.executing_eagerly():
    return
  if video.get_shape().as_list()[1] is None:
    tf.summary.image(
        "%s_last_frame" % prefix,
        tf.cast(video[:, -1, :, :, :], tf.uint8),
        max_outputs=max_outputs)
  else:
    for k in range(video_shape[1]):
      tf.summary.image(
          "%s_frame_%d" % (prefix, k),
          tf.cast(video[:, k, :, :, :], tf.uint8),
          max_outputs=max_outputs) 
Example #3
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def patch_discriminator(x, filters=64, filter_size=5, n=4,
                        name="patch_discrim"):
  """Patch descriminator."""
  with tf.variable_scope(name):
    x_shape = shape_list(x)
    spatial_dims = [x_shape[1] // 4, x_shape[2] // 4]
    x = tf.random_crop(x, [x_shape[0]] + spatial_dims + [x_shape[3]])
    for i in range(n):
      x = general_conv(
          x=x,
          num_filters=filters * 2**i,
          filter_size=filter_size,
          stride=2 if i != n - 1 else 1,
          stddev=0.02,
          padding="SAME",
          name="c%d" % i,
          do_norm="instance" if i != 0 else False,
          do_relu=i != n - 1,
          relufactor=0.2)
    x = tf.reduce_mean(x, [1, 2])
    return x 
Example #4
Source File: transformer_vae.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def compress(x, c, is_2d, hparams, name):
  """Compress."""
  with tf.variable_scope(name):
    # Run compression by strided convs.
    cur = x
    k1 = (3, 3) if is_2d else (3, 1)
    k2 = (2, 2) if is_2d else (2, 1)
    cur = residual_conv(cur, hparams.num_compress_steps, k1, hparams, "rc")
    if c is not None and hparams.do_attend_compress:
      cur = attend(cur, c, hparams, "compress_attend")
    for i in range(hparams.num_compress_steps):
      if hparams.do_residual_compress:
        cur = residual_conv(cur, hparams.num_compress_steps, k1, hparams,
                            "rc_%d" % i)
      cur = common_layers.conv_block(
          cur, hparams.hidden_size, [((1, 1), k2)],
          strides=k2, name="compress_%d" % i)
    return cur 
Example #5
Source File: shuffle_network.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def shuffle_layer(inputs, shuffle_fn=rol):
  """Shuffles the elements according to bitwise left or right rotation.

  Args:
    inputs: Tensor input from previous layer
    shuffle_fn: Shift function rol or ror

  Returns:
    tf.Tensor: Inputs shifted according to shuffle_fn
  """

  length = tf.shape(inputs)[1]
  n_bits = tf.log(tf.cast(length - 1, tf.float32)) / tf.log(2.0)
  n_bits = tf.cast(n_bits, tf.int32) + 1

  indices = tf.range(0, length)
  rev_indices = shuffle_fn(indices, n_bits)
  return tf.gather(inputs, rev_indices, axis=1) 
Example #6
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def argmax_with_score(logits, axis=None):
  """Argmax along with the value."""
  axis = axis or len(logits.get_shape()) - 1
  predictions = tf.argmax(logits, axis=axis)

  logits_shape = shape_list(logits)
  prefix_shape, vocab_size = logits_shape[:-1], logits_shape[-1]
  prefix_size = 1
  for d in prefix_shape:
    prefix_size *= d

  # Flatten to extract scores
  flat_logits = tf.reshape(logits, [prefix_size, vocab_size])
  flat_predictions = tf.reshape(predictions, [prefix_size])
  flat_indices = tf.stack(
      [tf.range(tf.to_int64(prefix_size)),
       tf.to_int64(flat_predictions)],
      axis=1)
  flat_scores = tf.gather_nd(flat_logits, flat_indices)

  # Unflatten
  scores = tf.reshape(flat_scores, prefix_shape)

  return predictions, scores 
Example #7
Source File: transformer.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def __init__(self, *args, **kwargs):
    super(TransformerMemory, self).__init__(*args, **kwargs)

    hparams = self._hparams
    self.recurrent_memory_by_layer = {}
    for layer in range(hparams.num_decoder_layers or hparams.num_hidden_layers):
      layer_name = "layer_%d" % layer
      if hparams.memory_type == "neural_memory":
        memory = transformer_memory.TransformerMemory(
            batch_size=int(hparams.batch_size / hparams.max_length),
            key_depth=hparams.hidden_size,
            val_depth=hparams.hidden_size,
            memory_size=hparams.split_targets_chunk_length,
            sharpen_factor=1.,
            name=layer_name + "/recurrent_memory")
      elif hparams.memory_type == "transformer_xl":
        memory = transformer_memory.RecentTokensMemory(
            layer_name + "/recurrent_memory", hparams)
      else:
        raise ValueError("Unsupported memory type: %s" % hparams.memory_type)
      self.recurrent_memory_by_layer[layer_name] = memory 
Example #8
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def smoothing_cross_entropy_factored(a, b, labels, confidence):
  """Memory-efficient computation of smoothing cross-entropy.

  Avoids realizing the entire logits matrix at once.

  Args:
    a: a Tensor with shape [batch, inner_dim]
    b: a Tensor with shape [vocab_size, inner_dim]
    labels: an integer Tensor with shape [batch]
    confidence: a float

  Returns:
    A Tensor with shape [batch]
  """
  num_splits = 16
  vocab_size = shape_list(b)[0]
  labels = approximate_split(labels, num_splits)
  a = approximate_split(a, num_splits)
  parts = []
  for part in range(num_splits):
    with tf.control_dependencies(parts[-1:]):
      logits = tf.matmul(a[part], b, transpose_b=True)
      parts.append(
          smoothing_cross_entropy(logits, labels[part], vocab_size, confidence))
  return tf.concat(parts, 0) 
Example #9
Source File: data_reader.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def pad_batch(features, batch_multiple):
  """Pad batch dim of features to nearest multiple of batch_multiple."""
  feature = list(features.items())[0][1]
  batch_size = tf.shape(feature)[0]
  mod = batch_size % batch_multiple
  has_mod = tf.cast(tf.cast(mod, tf.bool), tf.int32)
  batch_padding = batch_multiple * has_mod - mod

  padded_features = {}
  for k, feature in features.items():
    rank = len(feature.shape)
    paddings = [[0, 0] for _ in range(rank)]
    paddings[0][1] = batch_padding
    padded_feature = tf.pad(feature, paddings)
    padded_features[k] = padded_feature
  return padded_features


# TODO(lukaszkaiser): refactor the API to not be just a list of self params
#   but make sense for other uses too. 
Example #10
Source File: beam_search.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def compute_batch_indices(batch_size, beam_size):
  """Computes the i'th coordinate that contains the batch index for gathers.

  Batch pos is a tensor like [[0,0,0,0,],[1,1,1,1],..]. It says which
  batch the beam item is in. This will create the i of the i,j coordinate
  needed for the gather.

  Args:
    batch_size: Batch size
    beam_size: Size of the beam.
  Returns:
    batch_pos: [batch_size, beam_size] tensor of ids
  """
  batch_pos = tf.range(batch_size * beam_size) // beam_size
  batch_pos = tf.reshape(batch_pos, [batch_size, beam_size])
  return batch_pos 
Example #11
Source File: common_attention_test.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def testExtractblocks(self):

    batch_size = 1
    num_heads = 3
    height = 6
    width = 10
    depth = 15
    block_h = 3
    block_w = 2
    t = np.random.rand(batch_size * num_heads, height, width, depth)
    a = common_attention._extract_blocks(t, block_h, block_w)
    self.evaluate(tf.global_variables_initializer())
    res = self.evaluate(a)
    self.assertEqual(res.shape, (batch_size * num_heads, height//block_h,
                                 width//block_w, block_h, block_w, depth))
    # also check if the content is right
    out = np.zeros((batch_size*num_heads, height//block_h,
                    width//block_w, block_h, block_w, depth))
    for b in range(batch_size*num_heads):
      for x in range(height//block_h):
        for y in range(width//block_w):
          for v in range(block_h):
            for w in range(block_w):
              out[b, x, y, v, w] = t[b, block_h*x+v, block_w*y+w]
    self.assertAllClose(res, out) 
Example #12
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def get_timing_signal(length,
                      min_timescale=1,
                      max_timescale=1e4,
                      num_timescales=16):
  """Create Tensor of sinusoids of different frequencies.

  Args:
    length: Length of the Tensor to create, i.e. Number of steps.
    min_timescale: a float
    max_timescale: a float
    num_timescales: an int

  Returns:
    Tensor of shape (length, 2*num_timescales)
  """
  positions = to_float(tf.range(length))
  log_timescale_increment = (
      math.log(max_timescale / min_timescale) / (num_timescales - 1))
  inv_timescales = min_timescale * tf.exp(
      to_float(tf.range(num_timescales)) * -log_timescale_increment)
  scaled_time = tf.expand_dims(positions, 1) * tf.expand_dims(inv_timescales, 0)
  return tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1) 
Example #13
Source File: residual_shuffle_exchange.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def residual_shuffle_network(inputs, hparams):
  """Residual Shuffle-Exchange network with weight sharing.

  Args:
    inputs: inputs to the Shuffle-Exchange network. Should be in length of power
      of 2.
    hparams: Model configuration

  Returns:
    tf.Tensor: Outputs of the Shuffle-Exchange last layer
  """
  input_shape = tf.shape(inputs)
  n_bits = tf.log(tf.cast(input_shape[1] - 1, tf.float32)) / tf.log(2.0)
  n_bits = tf.cast(n_bits, tf.int32) + 1

  block_out = inputs

  for k in range(hparams.num_hidden_layers):
    with tf.variable_scope("benes_block_" + str(k), reuse=tf.AUTO_REUSE):
      forward_output = forward_part(block_out, hparams, n_bits)
      block_out = reverse_part(forward_output, hparams, n_bits)

  return RSU("last_layer", hparams.dropout, hparams.mode)(block_out) 
Example #14
Source File: generator_utils.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def _compute_auxiliary_structure(self, contents_and_mask):
    """Compute segment and position metadata."""
    contents = contents_and_mask[:, :self._num_sequences]
    start_mask = tf.cast(contents_and_mask[:, self._num_sequences:],
                         dtype=INDEX_DTYPE)

    segment = tf.cumsum(start_mask, axis=0)
    uniform_count = tf.ones_like(segment[:, 0])
    position = []
    for i in range(self._num_sequences):
      segment_slice = segment[:, i]
      counts = tf.math.segment_sum(uniform_count, segment[:, i])
      position.append(tf.range(self._packed_length) -  tf.cumsum(
          tf.gather(counts, segment_slice - 1) * start_mask[:, i]))
    position = tf.concat([i[:, tf.newaxis] for i in position], axis=1)

    # Correct for padding tokens.
    pad_mask = tf.cast(tf.not_equal(contents, 0), dtype=INDEX_DTYPE)
    segment *= pad_mask
    position *= pad_mask

    return segment, position 
Example #15
Source File: simulated_batch_gym_env.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def __init__(self, *args, **kwargs):
    with tf.Graph().as_default():
      self._batch_env = SimulatedBatchEnv(*args, **kwargs)

      self._actions_t = tf.placeholder(shape=(self.batch_size,), dtype=tf.int32)
      self._rewards_t, self._dones_t = self._batch_env.simulate(self._actions_t)
      with tf.control_dependencies([self._rewards_t]):
        self._obs_t = self._batch_env.observ
      self._indices_t = tf.placeholder(shape=(self.batch_size,), dtype=tf.int32)
      self._reset_op = self._batch_env.reset(
          tf.range(self.batch_size, dtype=tf.int32)
      )

      self._sess = tf.Session()
      self._sess.run(tf.global_variables_initializer())
      self._batch_env.initialize(self._sess) 
Example #16
Source File: ppo.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def _distributional_to_value(value_d, size, subscale, threshold):
  """Get a scalar value out of a value distribution in distributional RL."""
  half = size // 2
  value_range = (tf.to_float(tf.range(-half, half)) + 0.5) * subscale
  probs = tf.nn.softmax(value_d)

  if threshold == 0.0:
    return tf.reduce_sum(probs * value_range, axis=-1)

  # accumulated_probs[..., i] is the sum of probabilities in buckets upto i
  # so it is the probability that value <= i'th bucket value
  accumulated_probs = tf.cumsum(probs, axis=-1)
  # New probs are 0 on all lower buckets, until the threshold
  probs = tf.where(accumulated_probs < threshold, tf.zeros_like(probs), probs)
  probs /= tf.reduce_sum(probs, axis=-1, keepdims=True)  # Re-normalize.
  return tf.reduce_sum(probs * value_range, axis=-1) 
Example #17
Source File: generator_utils.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def _scanning_pack(self, dataset):
    """Apply scan based pack to a dataset."""
    if self._chop_long_sequences:
      dataset = dataset.map(lambda x: (x[:self._packed_length],))
    else:
      dataset = dataset.filter(lambda *x: tf.reduce_max(  # pylint: disable=g-long-lambda
          tf.stack([tf.shape(i)[0] for i in x]), axis=0) <= self._packed_length)

    # In order to retrieve the sequences which are still in the queue when the
    # dataset is exhausted, we feed dummy sequences which are guaranteed to
    # displace the remaining elements.
    dataset = dataset.concatenate(
        tf.data.Dataset.range(self._queue_size).map(self._eviction_fn))

    initial_state = self._scan_initial_state()
    step_fn = functools.partial(
        tf.autograph.to_graph(_scan_step_fn), packed_length=self._packed_length,
        queue_size=self._queue_size, spacing=self._spacing,
        num_sequences=self._num_sequences, token_dtype=self._token_dtype)

    dataset = dataset.apply(tf.data.experimental.scan(initial_state, step_fn))

    is_real_sample = lambda valid_sample, _: valid_sample
    return dataset.filter(is_real_sample) 
Example #18
Source File: generator_utils.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def shard_filepath(fname, num_shards):
  return [
      sharded_name(fname, shard, num_shards) for shard in range(num_shards)
  ] 
Example #19
Source File: common_video.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def write(self, batch_frame, batch_encoded_frame=None):
    del batch_encoded_frame
    if self.writers is None:
      self.writers = [
          WholeVideoWriter(  # pylint: disable=g-complex-comprehension
              self.fps, self.path_template.format(i), self.file_format
          )
          for i in range(len(batch_frame))
      ]
    for i, frame in enumerate(batch_frame):
      self.writers[i].write(frame) 
Example #20
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def convert_rgb_to_symmetric_real(x):
  """Conversion of pixel values to real numbers."""
  with tf.name_scope("rgb_to_real", values=[x]):
    x = to_float(x)
    # Convert each pixel intensity in [0, 1, 2, ..., 255] into a real number in
    # the range [-1, 1].
    x = (x / 127.5) - 1
    return x 
Example #21
Source File: residual_shuffle_exchange.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def reverse_part(inputs, hparams, n_bits):
  """Reverse part of Benes block.

  Repeatably applies interleaved Residual Switch layer and Reverse Shuffle
  Layer. One set of weights used for all Switch layers.

  Args:
    inputs: inputs for reverse part. Should be outputs from forward part.
    hparams: params of the network.
    n_bits: count of repeated layer applications.

  Returns:
    tf.Tensor: output of reverse part.
  """
  reverse_rsu = RSU("reverse_switch", hparams.dropout, hparams.mode)

  def reverse_step(state, _):
    with tf.variable_scope("reverse"):
      new_state = reverse_rsu(state)
      return reverse_shuffle_layer(new_state)

  reverse_outputs = tf.scan(
      reverse_step,
      tf.range(n_bits, n_bits * 2),
      initializer=inputs,
      parallel_iterations=1,
      swap_memory=True)

  return reverse_outputs[-1, :, :, :] 
Example #22
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def index_last_dim_with_indices(x, indices):
  """Use indices to index into the last axis of x.

  This can be useful for recovering the actual probabilities of a sample from a
  probability distribution.

  Args:
    x: Tensor, n-d.
    indices: Tensor, (n-1)-d, where the dimension sizes match the first (n-1)
      dimensions of x. The values of indices will be used to index into the last
      axis of x.

  Returns:
    Tensor, (n-1)-d.
  """
  assert len(x.shape) == len(indices.shape) + 1

  x_shape = shape_list(x)
  vocab_size = x_shape[-1]

  flat_x = tf.reshape(x, [list_product(x_shape[:-1]), vocab_size])
  flat_indices = tf.reshape(indices, [list_product(x_shape[:-1])])

  idx = tf.stack(
      [
          tf.range(tf.to_int64(shape_list(flat_indices)[0])),
          tf.to_int64(flat_indices)
      ],
      axis=1)
  flat_x_idx = tf.gather_nd(flat_x, idx)

  x_idx = tf.reshape(flat_x_idx, x_shape[:-1])

  return x_idx 
Example #23
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def top_kth_iterative(x, k):
  """Compute the k-th top element of x on the last axis iteratively.

  This assumes values in x are non-negative, rescale if needed.
  It is often faster than tf.nn.top_k for small k, especially if k < 30.
  Note: this does not support back-propagation, it stops gradients!

  Args:
    x: a Tensor of non-negative numbers of type float.
    k: a python integer.

  Returns:
    a float tensor of the same shape as x but with 1 on the last axis
    that contains the k-th largest number in x.
  """
  # The iterative computation is as follows:
  #
  # cur_x = x
  # for _ in range(k):
  #   top_x = maximum of elements of cur_x on the last axis
  #   cur_x = cur_x where cur_x < top_x and 0 everywhere else (top elements)
  #
  # We encode this computation in a TF graph using tf.foldl, so the inner
  # part of the above loop is called "next_x" and tf.foldl does the loop.
  def next_x(cur_x, _):
    top_x = tf.reduce_max(cur_x, axis=-1, keep_dims=True)
    return cur_x * to_float(cur_x < top_x)
  # We only do k-1 steps of the loop and compute the final max separately.
  fin_x = tf.foldl(next_x, tf.range(k - 1), initializer=tf.stop_gradient(x),
                   parallel_iterations=2, back_prop=False)
  return tf.stop_gradient(tf.reduce_max(fin_x, axis=-1, keep_dims=True)) 
Example #24
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def cumsum(x, axis=0, exclusive=False):
  """TPU hack for tf.cumsum.

  This is equivalent to tf.cumsum and is faster on TPU as of 04/2018 unless
  the axis dimension is very large.

  Args:
    x: a Tensor
    axis: an integer
    exclusive: a boolean

  Returns:
    Tensor of the same shape as x.
  """
  if not is_xla_compiled():
    return tf.cumsum(x, axis=axis, exclusive=exclusive)
  x_shape = shape_list(x)
  rank = len(x_shape)
  length = x_shape[axis]
  my_range = tf.range(length)
  comparator = tf.less if exclusive else tf.less_equal
  mask = tf.cast(
      comparator(tf.expand_dims(my_range, 1), tf.expand_dims(my_range, 0)),
      x.dtype)
  ret = tf.tensordot(x, mask, axes=[[axis], [0]])
  if axis != rank - 1:
    ret = tf.transpose(
        ret,
        list(range(axis)) + [rank - 1] + list(range(axis, rank - 1)))
  return ret 
Example #25
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def conv_stride2_multistep(x, nbr_steps, output_filters, name=None, reuse=None):
  """Use a strided convolution to downsample x by 2, `nbr_steps` times.

  We use stride and filter size 2 to avoid the checkerboard problem of deconvs.
  As detailed in http://distill.pub/2016/deconv-checkerboard/.

  Args:
    x: a `Tensor` with shape `[batch, spatial, depth]` or
     `[batch, spatial_1, spatial_2, depth]`
    nbr_steps: number of halving downsample rounds to apply
    output_filters: an int specifying the filter count for the convolutions
    name: a string
    reuse: a boolean

  Returns:
    a `Tensor` with shape `[batch, spatial / (2**nbr_steps), output_filters]` or
     `[batch, spatial_1 / (2**nbr_steps), spatial_2 / (2**nbr_steps),
       output_filters]`
  """
  with tf.variable_scope(
      name, default_name="conv_stride2_multistep", values=[x], reuse=reuse):
    if nbr_steps == 0:
      out = conv(x, output_filters, (1, 1))
      return out, [out]
    hidden_layers = [x]
    for i in range(nbr_steps):
      hidden_layers.append(
          conv(
              hidden_layers[-1],
              output_filters, (2, 2),
              strides=2,
              activation=tf.nn.relu,
              name="conv" + str(i)))
    return hidden_layers[-1], hidden_layers 
Example #26
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def smoothing_cross_entropy_factored_grad(op, dy):
  """Gradient function for smoothing_cross_entropy_factored."""
  a = op.inputs[0]
  b = op.inputs[1]
  labels = op.inputs[2]
  confidence = op.inputs[3]
  num_splits = 16
  vocab_size = shape_list(b)[0]
  labels = approximate_split(labels, num_splits)
  a = approximate_split(a, num_splits)
  dy = approximate_split(dy, num_splits)
  b_grad = None
  a_grad_parts = []
  deps = []
  for part in range(num_splits):
    with tf.control_dependencies(deps):
      logits = tf.matmul(a[part], b, transpose_b=True)
      output_part = smoothing_cross_entropy(logits, labels[part], vocab_size,
                                            confidence)
      a_grad_part, b_grad_part = tf.gradients(
          ys=[output_part], xs=[a[part], b], grad_ys=[dy[part]])
      a_grad_parts.append(a_grad_part)
      if part > 0:
        b_grad += b_grad_part
      else:
        b_grad = b_grad_part
      deps = [b_grad, a_grad_part]
  a_grad = tf.concat(a_grad_parts, 0)
  return a_grad, b_grad, None, None 
Example #27
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def tpu_conv1d(inputs, filters, kernel_size, padding="SAME", name="tpu_conv1d"):
  """Version of conv1d that works on TPU (as of 11/2017).

  Args:
    inputs: a Tensor with shape [batch, length, input_depth].
    filters: an integer.
    kernel_size: an integer.
    padding: a string - "SAME" or "LEFT".
    name: a string.

  Returns:
    a Tensor with shape [batch, length, filters].
  """
  if kernel_size == 1:
    return dense(inputs, filters, name=name, use_bias=True)
  if padding == "SAME":
    assert kernel_size % 2 == 1
    first_offset = -((kernel_size - 1) // 2)
  else:
    assert padding == "LEFT"
    first_offset = -(kernel_size - 1)
  last_offset = first_offset + kernel_size - 1
  results = []
  padded = tf.pad(inputs, [[0, 0], [-first_offset, last_offset], [0, 0]])
  for i in range(kernel_size):
    shifted = tf.slice(padded, [0, i, 0], tf.shape(inputs)) if i else inputs
    shifted.set_shape(inputs.get_shape())
    results.append(
        dense(shifted, filters, use_bias=(i == 0), name=name + "_%d" % i))
  ret = tf.add_n(results)
  ret *= kernel_size**-0.5
  return ret 
Example #28
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def approximate_split(x, num_splits, axis=0):
  """Split approximately equally into num_splits parts.

  Args:
    x: a Tensor
    num_splits: an integer
    axis: an integer.

  Returns:
    a list of num_splits Tensors.
  """
  size = shape_list(x)[axis]
  size_splits = [tf.div(size + i, num_splits) for i in range(num_splits)]
  return tf.split(x, size_splits, axis=axis) 
Example #29
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def mask_pos_lt(source_length, target_length):
  """A mask with 1.0 wherever source_pos < target_pos and 0.0 elsewhere.

  Args:
    source_length: an integer
    target_length: an integer
  Returns:
    a Tensor with shape [1, target_length, source_length]
  """
  return tf.expand_dims(
      tf.cast(tf.less(tf.expand_dims(tf.range(target_length), axis=0),
                      tf.expand_dims(tf.range(source_length), axis=1)),
              dtype=tf.float32), axis=0) 
Example #30
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def mask_pos_gt(source_length, target_length):
  """A mask with 1.0 wherever source_pos > target_pos and 0.0 elsewhere.

  Args:
    source_length: an integer
    target_length: an integer
  Returns:
    a Tensor with shape [1, target_length, source_length]
  """
  return tf.expand_dims(
      tf.cast(tf.greater(tf.expand_dims(tf.range(target_length), axis=0),
                         tf.expand_dims(tf.range(source_length), axis=1)),
              dtype=tf.float32), axis=0)