Python tensorflow.compat.v1.reshape() Examples

The following are 30 code examples of tensorflow.compat.v1.reshape(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.compat.v1 , or try the search function .
Example #1
Source File: neural_assistant.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def encode_knowledge_bottom(self, features):
    tf.logging.info("Encoding knowledge " + str(self.triple_num))
    # Make sure this is embeddings for triples
    # <tf.float32>[batch_size, triple_num*max_triple_length, 1, emb_dim]
    fact_embedding = features["encoded_triples"]
    # [batch_size, triple_num*max_triple_length, emb_dim]
    fact_embedding = tf.squeeze(fact_embedding, 2)

    kb_shape = common_layers.shape_list(fact_embedding)
    batch_size = kb_shape[0]
    embed_dim = kb_shape[2]
    # <tf.float32>[batch_size*triple_num, max_triple_length, emb_dim]
    re_fact_embedding = tf.reshape(
        fact_embedding, [batch_size * self.triple_num, -1, embed_dim],
        name="reshape_fact_embedding")

    # <tf.int64>[batch_size, triple_num]
    input_fact_lengths = features["triple_lens"]
    # Stack the fact lengths.
    # <tf.int64>[batch_size*max_triple_num]
    re_fact_lengths = tf.reshape(
        input_fact_lengths, [batch_size * self.triple_num, 1],
        name="reshape_fact_lengths")

    return re_fact_embedding, re_fact_lengths 
Example #2
Source File: metrics.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def padded_neg_log_perplexity_with_masking(
    predictions,
    labels,
    features,
    weights_fn=None):
  """Average log-perplexity with custom targets_mask."""
  del weights_fn
  if "targets_mask" not in features:
    raise ValueError("masked_neg_log_perplexity requires targets_mask feature")

  # Features are 4 dimensional, so we need to reshape the targets_mask to match
  # the shape of the labels. A lot of models rely on these features being 4D,
  # so it's best to update the shape of the mask.
  extended_targets_mask_shape = common_layers.shape_list(
      features["targets_mask"])
  extended_targets_mask_shape.extend([1, 1])
  features["targets_mask"] = tf.reshape(features["targets_mask"],
                                        shape=extended_targets_mask_shape)

  mask_fn = lambda labels: features["targets_mask"]
  return padded_neg_log_perplexity(predictions, labels, mask_fn) 
Example #3
Source File: vqa_attention.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def attn(image_feat, query, hparams, name="attn"):
  """Attention on image feature with question as query."""
  with tf.variable_scope(name, "attn", values=[image_feat, query]):
    attn_dim = hparams.attn_dim
    num_glimps = hparams.num_glimps
    num_channels = common_layers.shape_list(image_feat)[-1]
    if len(common_layers.shape_list(image_feat)) == 4:
      image_feat = common_layers.flatten4d3d(image_feat)
    query = tf.expand_dims(query, 1)
    image_proj = common_attention.compute_attention_component(
        image_feat, attn_dim, name="image_proj")
    query_proj = common_attention.compute_attention_component(
        query, attn_dim, name="query_proj")
    h = tf.nn.relu(image_proj + query_proj)
    h_proj = common_attention.compute_attention_component(
        h, num_glimps, name="h_proj")
    p = tf.nn.softmax(h_proj, axis=1)
    image_ave = tf.matmul(image_feat, p, transpose_a=True)
    image_ave = tf.reshape(image_ave, [-1, num_channels*num_glimps])

    return image_ave 
Example #4
Source File: transformer_vae_flow_prior.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def sample_q(
      self, targets, targets_mask, decoder_self_attention_bias, n_samples,
      temp, **kwargs):
    hparams = self._hparams
    batch_size, targets_max_length = common_layers.shape_list(targets_mask)[:2]
    q_params = ops.posterior("posterior", hparams, targets, targets_mask,
                             decoder_self_attention_bias, **kwargs)
    q_dist = gops.diagonal_normal(q_params, "posterior")
    loc, scale = q_dist.loc, q_dist.scale
    z_shape = [batch_size, targets_max_length, hparams.latent_size]
    iw_z_shape = [n_samples*batch_size, targets_max_length, hparams.latent_size]
    if n_samples == 1:
      noise = tf.random_normal(z_shape, stddev=temp)
      z_q = loc + scale * noise
      log_q_z = q_dist.log_prob(z_q)  # [B, L, C]
    else:
      noise = tf.random_normal([n_samples] + z_shape, stddev=temp)
      z_q = loc[tf.newaxis, ...] + scale[tf.newaxis, ...] * noise
      log_q_z = q_dist.log_prob(z_q)  # [K, B, L, C]
      z_q = tf.reshape(z_q, iw_z_shape)
      log_q_z = tf.reshape(log_q_z, iw_z_shape)
    return z_q, log_q_z, q_dist 
Example #5
Source File: transformer_vae_flow_prior_ops.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def decoder(name, latents, hparams, decoder_self_attention_bias, **kwargs):
  """Compute final hidden states for p(y|z,x)."""
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    decoder_input = drop_2d(latents, hparams.mode, hparams.decoder_2d_dropout)
    if hparams.pos_attn:
      decoder_input = gops.positional_attention(
          "pos_attn", decoder_input, decoder_self_attention_bias, hparams)
    else:
      decoder_input = common_attention.add_timing_signal_1d(decoder_input)
    if common_layers.shape_list(latents)[-1] != hparams.hidden_size:
      decoder_input = gops.dense("lat2hid", latents, hparams.hidden_size)
    decoder_output = transformer_decoder_layers(
        "block",
        n_layers=hparams.n_decoder_layers,
        decoder_input=decoder_input,
        hparams=hparams,
        decoder_self_attention_bias=decoder_self_attention_bias,
        **kwargs)
    batch_size, targets_length = common_layers.shape_list(decoder_output)[:2]
    decoder_output = tf.reshape(
        decoder_output, [batch_size, targets_length, 1, hparams.hidden_size])
    # Expand since t2t expects 4d tensors.
    return decoder_output 
Example #6
Source File: expert_utils.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def __init__(self, pad_mask):
    """Compute and store the location of the padding.

    Args:
      pad_mask (tf.Tensor): Reference padding tensor of shape
        [batch_size,length] or [dim_origin] (dim_origin=batch_size*length)
        containing non-zeros positive values to indicate padding location.
    """
    self.nonpad_ids = None
    self.dim_origin = None

    with tf.name_scope("pad_reduce/get_ids"):
      pad_mask = tf.reshape(pad_mask, [-1])  # Flatten the batch
      # nonpad_ids contains coordinates of zeros rows (as pad_mask is
      # float32, checking zero equality is done with |x| < epsilon, with
      # epsilon=1e-9 as standard, here pad_mask only contains positive values
      # so tf.abs would be redundant)
      self.nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9))
      self.dim_origin = tf.shape(pad_mask)[:1] 
Example #7
Source File: expert_utils.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def __init__(self, num_experts, gates):
    """Create a SparseDispatcher.

    Args:
      num_experts: an integer.
      gates: a `Tensor` of shape `[batch_size, num_experts]`.

    Returns:
      a SparseDispatcher
    """
    self._gates = gates
    self._num_experts = num_experts

    where = tf.to_int32(tf.where(tf.transpose(gates) > 0))
    self._expert_index, self._batch_index = tf.unstack(where, num=2, axis=1)
    self._part_sizes_tensor = tf.reduce_sum(tf.to_int32(gates > 0), [0])
    self._nonzero_gates = tf.gather(
        tf.reshape(self._gates, [-1]),
        self._batch_index * num_experts + self._expert_index) 
Example #8
Source File: image_transformer_2d.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def body(self, features):
    hparams = copy.copy(self._hparams)
    inputs = features["inputs"]
    targets = features["targets"]
    targets_shape = common_layers.shape_list(targets)
    if not (tf.get_variable_scope().reuse or
            hparams.mode == tf.estimator.ModeKeys.PREDICT):
      tf.summary.image("targets", targets, max_outputs=1)

    decoder_input, rows, cols = cia.prepare_decoder(
        targets, hparams)
    # Add class label to decoder input.
    if not hparams.unconditional:
      decoder_input += tf.reshape(inputs,
                                  [targets_shape[0], 1, 1, hparams.hidden_size])

    decoder_output = cia.transformer_decoder_layers(
        decoder_input, None,
        hparams.num_decoder_layers,
        hparams,
        attention_type=hparams.dec_attention_type,
        name="decoder")

    output = cia.create_output(decoder_output, rows, cols, targets, hparams)
    return output 
Example #9
Source File: beam_search.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def compute_batch_indices(batch_size, beam_size):
  """Computes the i'th coordinate that contains the batch index for gathers.

  Batch pos is a tensor like [[0,0,0,0,],[1,1,1,1],..]. It says which
  batch the beam item is in. This will create the i of the i,j coordinate
  needed for the gather.

  Args:
    batch_size: Batch size
    beam_size: Size of the beam.
  Returns:
    batch_pos: [batch_size, beam_size] tensor of ids
  """
  batch_pos = tf.range(batch_size * beam_size) // beam_size
  batch_pos = tf.reshape(batch_pos, [batch_size, beam_size])
  return batch_pos 
Example #10
Source File: lstm.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def body(self, features):
    # TODO(lukaszkaiser): investigate this issue and repair.
    if self._hparams.initializer == "orthogonal":
      raise ValueError("LSTM models fail with orthogonal initializer.")
    train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN
    # This is a temporary fix for varying-length sequences within in a batch.
    # A more complete fix should pass a length tensor from outside so that
    # all the lstm variants can use it.
    input_shape = common_layers.shape_list(features["inputs_raw"])
    flat_input = tf.reshape(features["inputs_raw"],
                            [input_shape[0], input_shape[1]])
    inputs_length = tf.reduce_sum(tf.minimum(flat_input, 1), -1)
    target_shape = common_layers.shape_list(features["targets_raw"])
    flat_target = tf.reshape(features["targets_raw"],
                             [target_shape[0], target_shape[1]])
    targets_length = tf.reduce_sum(tf.minimum(flat_target, 1), -1)
    tf.logging.info(self._hparams)
    return lstm_seq2seq_internal_attention(
        features["inputs"], features["targets"], self._hparams, train,
        inputs_length, targets_length) 
Example #11
Source File: attention_lm_moe.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def expand_batch_coordinates(bc, length_factor):
  """Duplicate elements of bc by length_factor.

  Args:
    bc (tf.Tensor): int32 tensor of shape [1, length, 1]
    length_factor (int):

  Returns:
    tf.Tensor: of shape [1, length*length_factor, 1] where every elements has
      been duplicated length_factor times.
  """
  assert bc.get_shape().as_list() == [1, None, 1]
  # bc has shape [1, length, 1]
  bc *= tf.constant([[1] * length_factor])
  # bc has shape [1, length, length_factor]
  bc = tf.reshape(bc, [1, -1, 1])
  # bc has shape [1, length*length_factor]
  return bc 
Example #12
Source File: transformer_nat.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def decode_transformer(encoder_output, encoder_decoder_attention_bias, targets,
                       hparams, name):
  """Original Transformer decoder."""
  with tf.variable_scope(name):
    targets = common_layers.flatten4d3d(targets)

    decoder_input, decoder_self_bias = (
        transformer.transformer_prepare_decoder(targets, hparams))

    decoder_input = tf.nn.dropout(decoder_input,
                                  1.0 - hparams.layer_prepostprocess_dropout)

    decoder_output = transformer.transformer_decoder(
        decoder_input, encoder_output, decoder_self_bias,
        encoder_decoder_attention_bias, hparams)
    decoder_output = tf.expand_dims(decoder_output, axis=2)
    decoder_output_shape = common_layers.shape_list(decoder_output)
    decoder_output = tf.reshape(
        decoder_output, [decoder_output_shape[0], -1, 1, hparams.hidden_size])
    # Expand since t2t expects 4d tensors.
    return decoder_output 
Example #13
Source File: expert_utils.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def combine(self, x):
    """Return the output from the experts.

    When one example goes to multiple experts, the outputs are summed.

    Args:
      x: a Tensor with shape [batch, num_experts, expert_capacity, depth]

    Returns:
      a `Tensor` with shape `[batch, length, depth]
    """
    depth = tf.shape(x)[-1]
    x *= tf.expand_dims(self._nonpadding, -1)
    ret = tf.unsorted_segment_sum(
        x, self._flat_indices, num_segments=self._batch * self._length)
    ret = tf.reshape(ret, [self._batch, self._length, depth])
    return ret 
Example #14
Source File: batch_allreduce.py    From benchmarks with Apache License 2.0 6 votes vote down vote up
def undo_maybe_concat_tensors(self, concatenated_tensor):
    """Undo maybe_concat_tensors()."""
    if not self._num_splits:
      return concatenated_tensor

    if len(concatenated_tensor) != 1:
      raise RuntimeError(
          'undo_maybe_split_tensors() must be called before '
          'undo_maybe_concat_tensors when num_splits is greater than 1')
    concatenated_tensor = concatenated_tensor[0]

    tensors_with_sizes = tf.split(concatenated_tensor,
                                  self._orig_sizes)
    tensors_with_shapes = [
        tf.reshape(grad, shape) for grad, shape in zip(
            tensors_with_sizes, self._orig_shapes)
    ]
    return tensors_with_shapes 
Example #15
Source File: allreduce.py    From benchmarks with Apache License 2.0 6 votes vote down vote up
def unpack_grad_tuple(gv, gpt):
  """Unpack a previously packed collection of gradient tensors.

  Args:
    gv: A (grad, var) pair to be unpacked.
    gpt: A GradPackTuple describing the packing operation that produced gv.

  Returns:
    A list of (grad, var) pairs corresponding to the values that were
     originally packed into gv, maybe following subsequent operations like
     reduction.
  """
  elt_widths = [x.num_elements() for x in gpt.shapes]
  with tf.device(gv[0][0].device):
    with tf.name_scope('unpack'):
      splits = tf.split(gv[0], elt_widths)
      unpacked_gv = []
      for idx, s in enumerate(splits):
        unpacked_gv.append((tf.reshape(s, gpt.shapes[idx]), gpt.vars[idx]))
  return unpacked_gv 
Example #16
Source File: shuffle_network.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def conv_linear_map(inputs, nin, nout, bias_start, prefix):
  """Convolutional liner map.

  Maps 3D tensor by last dimension.

  Args:
    inputs: Inputs that should be shuffled
    nin: Input feature map count
    nout: Output feature map count
    bias_start: Bias start value
    prefix: Name prefix

  Returns:
    tf.Tensor: Inputs with applied convolution
  """

  with tf.variable_scope(prefix):
    inp_shape = tf.shape(inputs)

    initializer = tf.variance_scaling_initializer(
        scale=1.0, mode="fan_avg", distribution="uniform")
    kernel = tf.get_variable("CvK", [nin, nout], initializer=initializer)
    bias_term = tf.get_variable(
        "CvB", [nout], initializer=tf.constant_initializer(0.0))

    mul_shape = [inp_shape[0] * inp_shape[1], nin]
    res = tf.matmul(tf.reshape(inputs, mul_shape), kernel)
    res = tf.reshape(res, [inp_shape[0], inp_shape[1], nout])
    return res + bias_start + bias_term


# pylint: disable=useless-object-inheritance 
Example #17
Source File: autoencoders.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def time_to_channels(embedded_video):
  """Put time dimension on channels in an embedded video."""
  video_shape = common_layers.shape_list(embedded_video)
  if len(video_shape) != 5:
    raise ValueError("Assuming videos given as tensors in the format "
                     "[batch, time, height, width, channels] but got one "
                     "of shape: %s" % str(video_shape))
  transposed = tf.transpose(embedded_video, [0, 2, 3, 1, 4])
  return tf.reshape(transposed, [
      video_shape[0], video_shape[2], video_shape[3],
      video_shape[1] * video_shape[4]
  ]) 
Example #18
Source File: glow_ops.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def squeeze(name, x, factor=2, reverse=True):
  """Block-wise spatial squeezing of x to increase the number of channels.

  Args:
    name: Used for variable scoping.
    x: 4-D Tensor of shape (batch_size X H X W X C)
    factor: Factor by which the spatial dimensions should be squeezed.
    reverse: Squueze or unsqueeze operation.

  Returns:
    x: 4-D Tensor of shape (batch_size X (H//factor) X (W//factor) X
       (cXfactor^2). If reverse is True, then it is factor = (1 / factor)
  """
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    shape = common_layers.shape_list(x)
    if factor == 1:
      return x
    height = int(shape[1])
    width = int(shape[2])
    n_channels = int(shape[3])

    if not reverse:
      assert height % factor == 0 and width % factor == 0
      x = tf.reshape(x, [-1, height//factor, factor,
                         width//factor, factor, n_channels])
      x = tf.transpose(x, [0, 1, 3, 5, 2, 4])
      x = tf.reshape(x, [-1, height//factor, width //
                         factor, n_channels*factor*factor])
    else:
      x = tf.reshape(
          x, (-1, height, width, int(n_channels/factor**2), factor, factor))
      x = tf.transpose(x, [0, 1, 4, 2, 5, 3])
      x = tf.reshape(x, (-1, int(height*factor),
                         int(width*factor), int(n_channels/factor**2)))
    return x 
Example #19
Source File: vanilla_gan.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def discriminator(self, x, is_training, reuse=False):
    """Discriminator architecture based on InfoGAN.

    Args:
      x: input images, shape [bs, h, w, channels]
      is_training: boolean, are we in train or eval model.
      reuse: boolean, should params be re-used.

    Returns:
      out_logit: the output logits (before sigmoid).
    """
    hparams = self.hparams
    with tf.variable_scope(
        "discriminator", reuse=reuse,
        initializer=tf.random_normal_initializer(stddev=0.02)):
      batch_size, height, width = common_layers.shape_list(x)[:3]
      # Mapping x from [bs, h, w, c] to [bs, 1]
      net = tf.layers.conv2d(x, 64, (4, 4), strides=(2, 2),
                             padding="SAME", name="d_conv1")
      # [bs, h/2, w/2, 64]
      net = lrelu(net)
      net = tf.layers.conv2d(net, 128, (4, 4), strides=(2, 2),
                             padding="SAME", name="d_conv2")
      # [bs, h/4, w/4, 128]
      if hparams.discriminator_batchnorm:
        net = tf.layers.batch_normalization(net, training=is_training,
                                            momentum=0.999, name="d_bn2")
      net = lrelu(net)
      size = height * width
      net = tf.reshape(net, [batch_size, size * 8])  # [bs, h * w * 8]
      net = tf.layers.dense(net, 1024, name="d_fc3")  # [bs, 1024]
      if hparams.discriminator_batchnorm:
        net = tf.layers.batch_normalization(net, training=is_training,
                                            momentum=0.999, name="d_bn3")
      net = lrelu(net)
      return net 
Example #20
Source File: vanilla_gan.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def generator(self, z, is_training, out_shape):
    """Generator outputting image in [0, 1]."""
    hparams = self.hparams
    height, width, c_dim = out_shape
    batch_size = hparams.batch_size
    with tf.variable_scope(
        "generator",
        initializer=tf.random_normal_initializer(stddev=0.02)):
      net = tf.layers.dense(z, 1024, name="g_fc1")
      net = tf.layers.batch_normalization(net, training=is_training,
                                          momentum=0.999, name="g_bn1")
      net = lrelu(net)
      net = tf.layers.dense(net, 128 * (height // 4) * (width // 4),
                            name="g_fc2")
      net = tf.layers.batch_normalization(net, training=is_training,
                                          momentum=0.999, name="g_bn2")
      net = lrelu(net)
      net = tf.reshape(net, [batch_size, height // 4, width // 4, 128])
      net = deconv2d(net, [batch_size, height // 2, width // 2, 64],
                     4, 4, 2, 2, name="g_dc3")
      net = tf.layers.batch_normalization(net, training=is_training,
                                          momentum=0.999, name="g_bn3")
      net = lrelu(net)
      net = deconv2d(net, [batch_size, height, width, c_dim],
                     4, 4, 2, 2, name="g_dc4")
      out = tf.nn.sigmoid(net)
      return common_layers.convert_real_to_rgb(out) 
Example #21
Source File: vanilla_gan.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def body(self, features):
    """Body of the model.

    Args:
      features: a dictionary with the tensors.

    Returns:
      A pair (predictions, losses) where predictions is the generated image
      and losses is a dictionary of losses (that get added for the final loss).
    """
    features["targets"] = features["inputs"]
    is_training = self.hparams.mode == tf.estimator.ModeKeys.TRAIN

    # Input images.
    inputs = tf.to_float(features["targets_raw"])

    # Noise vector.
    z = tf.random_uniform([self.hparams.batch_size,
                           self.hparams.bottleneck_bits],
                          minval=-1, maxval=1, name="z")

    # Generator output: fake images.
    out_shape = common_layers.shape_list(inputs)[1:4]
    g = self.generator(z, is_training, out_shape)

    losses = self.losses(inputs, g)  # pylint: disable=not-callable

    summary_g_image = tf.reshape(
        g[0, :], [1] + common_layers.shape_list(inputs)[1:])
    tf.summary.image("generated", summary_g_image, max_outputs=1)

    if is_training:  # Returns an dummy output and the losses dictionary.
      return tf.zeros_like(inputs), losses
    return tf.reshape(g, tf.shape(inputs)), losses 
Example #22
Source File: shuffle_network.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def gated_linear_map(self, inputs, suffix, bias_start_reset, in_units,
                       out_units):
    """Linear mapping with two reset gates.

    Args:
      inputs: Input tensor
      suffix: Linear map name suffix
      bias_start_reset: Bias start value for reset gate
      in_units: Size of input tensor feature map count
      out_units: Size of output tensor feature map count
    Return:
      tf.Tensor: Convolution apply to input tensor
    """

    def reset_gate(name):
      prefix = self.prefix + name + suffix
      reset = conv_linear_map(inputs, in_units * 2, in_units * 2,
                              bias_start_reset, prefix)
      return tf.nn.sigmoid(reset)

    in_shape = [self.batch_size, self.length // 2, in_units * 2]
    inputs = tf.reshape(inputs, in_shape)

    reset1 = reset_gate("/reset1/")
    reset2 = reset_gate("/reset2/")
    res1 = conv_linear_map(inputs * reset1, in_units * 2, out_units, 0.0,
                           self.prefix + "/cand1/" + suffix)
    res2 = conv_linear_map(inputs * reset2, in_units * 2, out_units, 0.0,
                           self.prefix + "/cand2/" + suffix)

    res = tf.concat([res1, res2], axis=2)
    res = tf.reshape(res, [self.batch_size, self.length, out_units])
    return tf.nn.tanh(res) 
Example #23
Source File: autoencoders.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def full_stack(self, b, x_size, bottleneck_bits, losses, is_training, i):
    stack1_b = self.stack(b, x_size, bottleneck_bits, "step%d" % i)
    if i > 1:
      stack1_b = self.full_stack(stack1_b, 2 * x_size, 2 * bottleneck_bits,
                                 losses, is_training, i - 1)
    b1, b_pred = self.unstack(stack1_b, x_size, bottleneck_bits, "step%d" % i)
    losses["stack%d_loss" % i] = self.stack_loss(b, b_pred, "step%d" % i)
    b_shape = common_layers.shape_list(b)
    if is_training:
      condition = tf.less(tf.random_uniform([]), 0.5)
      condition = tf.reshape(condition, [1] * len(b.shape))
      condition = tf.tile(condition, b.shape)
      b1 = tf.where(condition, b, b1)
    return tf.reshape(b1, b_shape) 
Example #24
Source File: rl.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def body(self, features):
    observations = features["inputs"]
    x = tf.transpose(observations, [0, 2, 3, 1, 4])
    x_shape = common_layers.shape_list(x)
    x = tf.reshape(x, x_shape[:-2] + [-1])
    dropout = getattr(self.hparams, "dropout_ppo", 0.0)
    with tf.variable_scope("feed_forward_cnn_small"):
      x = tf.cast(x, tf.float32) / 255.0
      x = tf.nn.dropout(x, rate=dropout)
      x = tf.layers.conv2d(
          x, 32, (4, 4), strides=(2, 2), name="conv1",
          activation=common_layers.belu, padding="SAME")
      x = tf.nn.dropout(x, rate=dropout)
      x = tf.layers.conv2d(
          x, 64, (4, 4), strides=(2, 2), name="conv2",
          activation=common_layers.belu, padding="SAME")
      x = tf.nn.dropout(x, rate=dropout)
      x = tf.layers.conv2d(
          x, 128, (4, 4), strides=(2, 2), name="conv3",
          activation=common_layers.belu, padding="SAME")

      flat_x = tf.layers.flatten(x)
      flat_x = tf.nn.dropout(flat_x, rate=dropout)
      x = tf.layers.dense(flat_x, 128, activation=tf.nn.relu, name="dense1")

      logits = tf.layers.dense(
          x, self.hparams.problem.num_actions, name="dense2"
      )
      logits = tf.expand_dims(logits, axis=1)
      logits = clip_logits(logits, self.hparams)

      value = tf.layers.dense(x, 1, name="value")
    return {"target_policy": logits, "target_value": value} 
Example #25
Source File: rl.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def body(self, features):
    observations = features["inputs_raw"]
    # Axis 0    - Batch.
    # Axis 1    - Input Frames, 4 frames.
    # Axis 2, 3 - Height & Width.
    # Axis 4    - Channels RGB, 3 colours.
    x = tf.transpose(observations, [0, 2, 3, 1, 4])
    x_shape = common_layers.shape_list(x)
    x = tf.reshape(x, x_shape[:-2] + [-1])
    dropout = getattr(self.hparams, "dropout_ppo", 0.0)
    with tf.variable_scope("feed_forward_cnn_small"):
      x = tf.cast(x, tf.float32) / 255.0
      x = tf.layers.conv2d(x, 32, (5, 5), strides=(2, 2),
                           activation=tf.nn.relu, padding="same")
      x = tf.layers.conv2d(x, 32, (5, 5), strides=(2, 2),
                           activation=tf.nn.relu, padding="same")

      flat_x = tf.layers.flatten(x)
      if self.use_epochs:
        epoch = features["epoch"] + tf.zeros([x_shape[0]], dtype=tf.int32)
        # Randomly set epoch to 0 in some cases as that's the inference value.
        rand = tf.random.uniform([x_shape[0]])
        epoch = tf.where(rand < 0.1, tf.zeros_like(epoch), epoch)
        # Embed the epoch number.
        emb_epoch = common_layers.embedding(epoch, 32, 32)  # [batch, 32]
        flat_x = tf.concat([flat_x, emb_epoch], axis=1)
      flat_x = tf.layers.dropout(flat_x, rate=dropout)
      x = tf.layers.dense(flat_x, 128, activation=tf.nn.relu)

      logits = tf.layers.dense(
          x, self.hparams.problem.num_actions, name="dense2"
      )
      logits = clip_logits(logits, self.hparams)
      logits = tf.expand_dims(logits, axis=1)
      value = tf.layers.dense(x, self.distributional_value_size)
    return {"target_policy": logits, "target_value": value} 
Example #26
Source File: rl.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def feed_forward_gaussian_fun(action_space, config, observations):
  """Feed-forward Gaussian."""
  if not isinstance(action_space, gym.spaces.box.Box):
    raise ValueError("Expecting continuous action space.")

  mean_weights_initializer = tf.initializers.variance_scaling(
      scale=config.init_mean_factor)
  logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)

  flat_observations = tf.reshape(observations, [
      tf.shape(observations)[0], tf.shape(observations)[1],
      functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)])

  with tf.variable_scope("network_parameters"):
    with tf.variable_scope("policy"):
      x = flat_observations
      for size in config.policy_layers:
        x = tf.layers.dense(x, size, activation=tf.nn.relu)
      mean = tf.layers.dense(
          x, action_space.shape[0], activation=tf.tanh,
          kernel_initializer=mean_weights_initializer)
      logstd = tf.get_variable(
          "logstd", mean.shape[2:], tf.float32, logstd_initializer)
      logstd = tf.tile(
          logstd[None, None],
          [tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
    with tf.variable_scope("value"):
      x = flat_observations
      for size in config.value_layers:
        x = tf.layers.dense(x, size, activation=tf.nn.relu)
      value = tf.layers.dense(x, 1)[..., 0]
  mean = tf.check_numerics(mean, "mean")
  logstd = tf.check_numerics(logstd, "logstd")
  value = tf.check_numerics(value, "value")

  policy = tfp.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))

  return NetworkOutput(policy, value, lambda a: tf.clip_by_value(a, -2., 2)) 
Example #27
Source File: transformer_nat.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def vq_discrete_unbottleneck(x, hparams):
  """Simple undiscretization from vector quantized representation."""
  x_shape = common_layers.shape_list(x)
  bottleneck_size = 2**hparams.bottleneck_bits
  means = hparams.means
  x_flat = tf.reshape(x, [-1, bottleneck_size])
  result = tf.matmul(x_flat, means)
  result = tf.reshape(result, x_shape[:-1] + [hparams.hidden_size])
  return result 
Example #28
Source File: transformer_nat.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def vq_discrete_bottleneck(x, hparams):
  """Simple vector quantized discrete bottleneck."""
  tf.logging.info("Using EMA with beta = {}".format(hparams.beta))
  bottleneck_size = 2**hparams.bottleneck_bits
  x_shape = common_layers.shape_list(x)
  x = tf.reshape(x, [-1, hparams.hidden_size])
  x_means_hot, e_loss = vq_nearest_neighbor(
      x, hparams)
  means, ema_means, ema_count = (hparams.means, hparams.ema_means,
                                 hparams.ema_count)

  # Update the ema variables
  updated_ema_count = moving_averages.assign_moving_average(
      ema_count,
      tf.reduce_sum(x_means_hot, axis=0),
      hparams.decay,
      zero_debias=False)

  dw = tf.matmul(x_means_hot, x, transpose_a=True)
  updated_ema_means = moving_averages.assign_moving_average(
      ema_means, dw, hparams.decay, zero_debias=False)
  n = tf.reduce_sum(updated_ema_count, axis=-1, keepdims=True)
  updated_ema_count = (
      (updated_ema_count + hparams.epsilon) /
      (n + bottleneck_size * hparams.epsilon) * n)
  # pylint: disable=g-no-augmented-assignment
  updated_ema_means = updated_ema_means / tf.expand_dims(
      updated_ema_count, axis=-1)
  # pylint: enable=g-no-augmented-assignment
  with tf.control_dependencies([e_loss]):
    update_means = tf.assign(means, updated_ema_means)
    with tf.control_dependencies([update_means]):
      loss = hparams.beta * e_loss

  discrete = tf.reshape(x_means_hot, x_shape[:-1] + [bottleneck_size])
  return discrete, loss 
Example #29
Source File: transformer_vae.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def multinomial_sample(x, vocab_size, temperature):
  """Multinomial sampling from a n-dimensional tensor."""
  if temperature > 0:
    samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1)
  else:
    samples = tf.argmax(x, axis=-1)
  reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1])
  return tf.to_int32(reshaped_samples) 
Example #30
Source File: transformer_vae.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def top_k_experts(x, k, hparams):
  x_shape = common_layers.shape_list(x)
  x_flat = tf.reshape(x, [-1, common_layers.shape_list(x)[-1]])
  is_training = hparams.mode == tf.estimator.ModeKeys.TRAIN
  gates, load = expert_utils.noisy_top_k_gating(
      x_flat, 2 ** hparams.z_size, is_training, k)
  gates_shape = [x_shape[0], x_shape[1], x_shape[2], 2 ** hparams.z_size]
  gates = tf.reshape(gates, gates_shape)
  load_loss = expert_utils.cv_squared(load)
  return gates, load_loss