Python tensorflow.compat.v1.get_variable() Examples

The following are 30 code examples of tensorflow.compat.v1.get_variable(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.compat.v1 , or try the search function .
Example #1
Source File: variable_mgr.py    From benchmarks with Apache License 2.0 6 votes vote down vote up
def get_gradients_to_apply(self, device_num, gradient_state):
    device_grads = gradient_state  # From 2nd result of preprocess_device_grads.

    avg_grads, self.grad_has_inf_nan = (
        variable_mgr_util.aggregate_gradients_using_copy_with_device_selection(
            self.benchmark_cnn,
            device_grads,
            use_mean=True,
            check_inf_nan=self.benchmark_cnn.enable_auto_loss_scale))

    # Make shadow variable on a parameter server for each original trainable
    # variable.
    for i, (g, v) in enumerate(avg_grads):
      my_name = variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/' + v.name
      if my_name.endswith(':0'):
        my_name = my_name[:-2]
      new_v = tf.get_variable(
          my_name,
          dtype=v.dtype.base_dtype,
          initializer=v.initial_value,
          trainable=True)
      avg_grads[i] = (g, new_v)
    return avg_grads 
Example #2
Source File: universal_transformer_util.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def add_depth_embedding(x):
  """Add n-dimensional embedding as the depth embedding (timing signal).

  Adds embeddings to represent the position of the step in the recurrent
  tower.

  Args:
    x: a tensor with shape [max_step, batch, length, depth]

  Returns:
    a Tensor the same shape as x.
  """
  x_shape = common_layers.shape_list(x)
  depth = x_shape[-1]
  num_steps = x_shape[0]
  shape = [num_steps, 1, 1, depth]
  depth_embedding = (
      tf.get_variable(
          "depth_embedding",
          shape,
          initializer=tf.random_normal_initializer(0, depth**-0.5)) * (depth**
                                                                       0.5))

  x += depth_embedding
  return x 
Example #3
Source File: common_layers_test.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def testSpectralNorm(self):
    # Test that after 20 calls to apply_spectral_norm, the spectral
    # norm of the normalized matrix is close to 1.0
    with tf.Graph().as_default():
      weights = tf.get_variable("w", dtype=tf.float32, shape=[2, 3, 50, 100])
      weights = tf.multiply(weights, 10.0)
      normed_weight, assign_op = common_layers.apply_spectral_norm(weights)

      with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for _ in range(20):
          sess.run(assign_op)
          normed_weight, assign_op = common_layers.apply_spectral_norm(
              weights)
        normed_weight = sess.run(normed_weight).reshape(-1, 100)
        _, s, _ = np.linalg.svd(normed_weight)
        self.assertTrue(np.allclose(s[0], 1.0, rtol=0.1)) 
Example #4
Source File: transformer_nat.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def init_vq_bottleneck(bottleneck_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  means = tf.get_variable(
      name="means",
      shape=[bottleneck_size, hidden_size],
      initializer=tf.uniform_unit_scaling_initializer())
  ema_count = tf.get_variable(
      name="ema_count",
      shape=[bottleneck_size],
      initializer=tf.constant_initializer(0),
      trainable=False)
  with tf.colocate_with(means):
    ema_means = tf.get_variable(
        name="ema_means",
        initializer=means.initialized_value(),
        trainable=False)

  return means, ema_means, ema_count 
Example #5
Source File: glow_ops.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def scale_gaussian_prior(name, z, logscale_factor=3.0, trainable=True):
  """Returns N(s^i * z^i, std^i) where s^i and std^i are pre-component.

  s^i is a learnable parameter with identity initialization.
  std^i is optionally learnable with identity initialization.

  Args:
    name: variable scope.
    z: input_tensor
    logscale_factor: equivalent to scaling up the learning_rate by a factor
                     of logscale_factor.
    trainable: Whether or not std^i is learnt.
  """
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    z_shape = common_layers.shape_list(z)
    latent_multiplier = tf.get_variable(
        "latent_multiplier", shape=z_shape, dtype=tf.float32,
        initializer=tf.ones_initializer())
    log_scale = tf.get_variable(
        "log_scale_latent", shape=z_shape, dtype=tf.float32,
        initializer=tf.zeros_initializer(), trainable=trainable)
    log_scale = log_scale * logscale_factor
    return tfp.distributions.Normal(
        loc=latent_multiplier * z, scale=tf.exp(log_scale)) 
Example #6
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def zero_add(previous_value, x, name=None, reuse=None):
  """Resnet connection with zero initialization.

  Another type of resnet connection which returns previous_value + gamma * x.
  gamma is a trainable scalar and initialized with zero. It is useful when a
  module is plugged into a trained model and we want to make sure it matches the
  original model's performance.

  Args:
    previous_value:  A tensor.
    x: A tensor.
    name: name of variable scope; defaults to zero_add.
    reuse: reuse scope.

  Returns:
    previous_value + gamma * x.
  """
  with tf.variable_scope(name, default_name="zero_add", reuse=reuse):
    gamma = tf.get_variable("gamma", (), initializer=tf.zeros_initializer())
    return previous_value + gamma * x 
Example #7
Source File: transformer_glow_layers_ops.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def dense_weightnorm(
    name, x, n_out, x_mask, init_scale, init, dtype=tf.float32):
  """Dense layer with weight normalization."""
  n_in = common_layers.shape_list(x)[2]
  eps = tf.keras.backend.epsilon()
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    v = tf.get_variable(
        "v", [n_in, n_out], dtype,
        initializer=tf.random_normal_initializer(0, 0.05), trainable=True)
    v = v / tf.norm(v, axis=0, keepdims=True)
    t = tf.matmul(x, v)  # [B, L, n_out]
    mean, var = moments_over_bl(t, x_mask)
    g_init = init_scale / (tf.sqrt(var) + eps)
    g = get_variable_ddi(
        "g", [n_out], g_init, init,
        initializer=tf.zeros_initializer, dtype=dtype, trainable=True)
    b = get_variable_ddi(
        "b", [n_out], -mean*g_init, init,
        initializer=tf.zeros_initializer, dtype=dtype, trainable=True)
    w = g * v
    y = tf.matmul(x, w) + b
    tf.summary.histogram("_g", g)
    return y 
Example #8
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def group_norm(x, filters=None, num_groups=8, epsilon=1e-5):
  """Group normalization as in https://arxiv.org/abs/1803.08494."""
  x_shape = shape_list(x)
  if filters is None:
    filters = x_shape[-1]
  assert len(x_shape) == 4
  assert filters % num_groups == 0
  # Prepare variables.
  scale = tf.get_variable(
      "group_norm_scale", [filters], initializer=tf.ones_initializer())
  bias = tf.get_variable(
      "group_norm_bias", [filters], initializer=tf.zeros_initializer())
  epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
  # Reshape and compute group norm.
  x = tf.reshape(x, x_shape[:-1] + [num_groups, filters // num_groups])
  # Calculate mean and variance on heights, width, channels (not groups).
  mean, variance = tf.nn.moments(x, [1, 2, 4], keep_dims=True)
  norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
  return tf.reshape(norm_x, x_shape) * scale + bias 
Example #9
Source File: nas_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def _conv_function(self, input_tensor, output_depth, padding):
    input_depth = input_tensor.shape.as_list()[-1]
    if not ((output_depth >= input_depth) and
            (output_depth % input_depth == 0)):
      raise ValueError(
          "Depthwise layer output_depth (%s) must be greater or equal to and "
          "a multiple of the depth of the "
          "input tensor (%s)." % (output_depth, input_depth))
    channel_multiplier = calculate_depthwise_channel_multiplier(
        input_depth, output_depth)
    kernel = tf.get_variable(
        "kernel", [self._conv_width, 1, input_depth, channel_multiplier])
    return tf.nn.depthwise_conv2d(
        input_tensor,
        kernel, [1, 1, 1, 1],
        padding=padding,
        name="depthwise_conv_%sx1" % str(self._conv_width)) 
Example #10
Source File: discretization.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def get_vq_codebook(codebook_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  with tf.variable_scope("vq", reuse=tf.AUTO_REUSE):
    means = tf.get_variable(
        name="means",
        shape=[codebook_size, hidden_size],
        initializer=tf.uniform_unit_scaling_initializer())

    ema_count = tf.get_variable(
        name="ema_count",
        shape=[codebook_size],
        initializer=tf.constant_initializer(0),
        trainable=False)

    with tf.colocate_with(means):
      ema_means = tf.get_variable(
          name="ema_means",
          initializer=means.initialized_value(),
          trainable=False)

  return means, ema_means, ema_count 
Example #11
Source File: tiled_linear.py    From lamb with Apache License 2.0 6 votes vote down vote up
def _build_tiled_linear(self, inputs, input_name_and_sizes,
                          output_name_and_sizes, add_bias):
    results = []
    for output_name, output_size in output_name_and_sizes:
      r = 0.0
      for input_, (input_name, input_size) in zip(inputs, input_name_and_sizes):
        name = 'W_{}_{}'.format(input_name, output_name)
        weight = self._get_variable(
            name, shape=[output_size, input_size])
        r += tf.sparse_tensor_dense_matmul(weight, input_, adjoint_b=True)
      r = tf.transpose(r)
      if add_bias:
        # Biases are dense, hence we call _get_variable of the base
        # class.
        r += super(SparseTiledLinear, self)._get_variable(
            'B_{}'.format(output_name), shape=[output_size],
            default_initializer=tf.zeros_initializer())
      results.append(r)
    return results


# TODO(melisgl): Since computation is the same as in TiledLinear,
# perhaps this should be implemented as a custom getter (see
# tf.get_variable) instead of being tied to tiling. 
Example #12
Source File: common_image_attention.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def get_channel_embeddings(io_depth, targets, hidden_size, name="channel"):
  """Get separate embedding for each of the channels."""
  targets_split = tf.split(targets, io_depth, axis=3)
  rgb_embedding_var = tf.get_variable("rgb_target_emb_%s" % name,
                                      [256 * io_depth, hidden_size])
  rgb_embedding_var = tf.identity(rgb_embedding_var)
  rgb_embedding_var *= float(hidden_size)**0.5
  channel_target_embs = []
  for i in range(io_depth):
    # Adding the channel offsets to get the right embedding since the
    # embedding tensor has shape 256 * io_depth, hidden_size
    target_ids = tf.squeeze(targets_split[i], axis=3) + i * 256
    target_embs = common_layers.gather(rgb_embedding_var, target_ids)
    channel_target_embs.append(target_embs)

  return tf.concat(channel_target_embs, axis=-1) 
Example #13
Source File: tpu_util.py    From morph-net with Apache License 2.0 6 votes vote down vote up
def write_to_variable(tensor, fail_if_exists=True):
  """Saves a tensor for later retrieval on CPU."""
  # Only relevant for debugging.
  debug_name = 'tpu_util__' + tensor.name.split(':')[0]

  reuse = False if fail_if_exists else tf.compat.v1.AUTO_REUSE
  with tf.variable_scope(top_level_scope, reuse=reuse):
    variable = tf.get_variable(
        name=debug_name,
        shape=tensor.shape,
        dtype=tensor.dtype,
        trainable=False,
        use_resource=True)

  var_store[tensor] = variable
  with tf.control_dependencies([variable.assign(tensor)]):
    tensor_copy = tf.identity(tensor)
  var_store[tensor_copy] = variable
  return tensor_copy 
Example #14
Source File: batch_norm_source_op_handler_test.py    From morph-net with Apache License 2.0 6 votes vote down vote up
def testCreateRegularizer_Sliced(self):
    # Call handler to create regularizer.
    handler = batch_norm_source_op_handler.BatchNormSourceOpHandler(
        _GAMMA_THRESHOLD)
    batch_norm_op_slice = orm.OpSlice(self.batch_norm_op, orm.Slice(0, 3))
    regularizer = handler.create_regularizer(batch_norm_op_slice)

    # Verify regularizer is the gamma tensor.
    with self.cached_session():
      # Initialize the gamma tensor to check value equality.
      with tf.variable_scope('', reuse=tf.AUTO_REUSE):
        gamma_tensor = tf.get_variable('conv1/BatchNorm/gamma')
      init = tf.variables_initializer([gamma_tensor])
      init.run()

      # Verify regularizer is the sliced gamma tensor.
      self.assertAllEqual(gamma_tensor.eval()[0:3],
                          regularizer._gamma.eval()) 
Example #15
Source File: utils.py    From lamb with Apache License 2.0 6 votes vote down vote up
def layer_norm(x, reduction_indices, epsilon=1e-9, gain=None, bias=None,
               per_element=True, scope=None):
  """DOC."""
  reduction_indices = ensure_list(reduction_indices)
  mean = tf.reduce_mean(x, reduction_indices, keep_dims=True)
  variance = tf.reduce_mean(tf.squared_difference(x, mean),
                            reduction_indices, keep_dims=True)
  normalized = (x - mean) / tf.sqrt(variance + epsilon)
  dtype = x.dtype
  shape = x.get_shape().as_list()
  for i in six.moves.range(len(shape)):
    if i not in reduction_indices or not per_element:
      shape[i] = 1
  with tf.variable_scope(scope or 'layer_norm'):
    if gain is None:
      gain = tf.get_variable('gain', shape=shape, dtype=dtype,
                             initializer=tf.ones_initializer())
    if bias is None:
      bias = tf.get_variable('bias', shape=shape, dtype=dtype,
                             initializer=tf.zeros_initializer())
  return gain*normalized+bias 
Example #16
Source File: evaluator.py    From graphics with Apache License 2.0 6 votes vote down vote up
def _init_graph(self):
    """Initialize computation graph for tensorflow.
    """
    with self.graph.as_default():
      self.refiner = im.ImNet(dim=self.dim,
                              in_features=self.codelen,
                              out_features=self.out_features,
                              num_filters=self.num_filters)
      self.global_step = tf.get_variable('global_step', shape=[],
                                         dtype=tf.int64)

      self.pts_ph = tf.placeholder(tf.float32, shape=[self.point_batch, 3])
      self.lat_ph = tf.placeholder(tf.float32, shape=[self.codelen])

      lat = tf.broadcast_to(self.lat_ph[tf.newaxis],
                            [self.point_batch, self.codelen])
      code = tf.concat((self.pts_ph, lat), axis=-1)  # [pb, 3+c]

      vals = self.refiner(code, training=False)  # [pb, 1]
      self.vals = tf.squeeze(vals, axis=1)  # [pb]
      self.saver = tf.train.Saver()
      self.sess = tf.Session()
      self.saver.restore(self.sess, self.ckpt) 
Example #17
Source File: evaluator.py    From graphics with Apache License 2.0 6 votes vote down vote up
def _init_graph(self):
    """Initialize computation graph for tensorflow."""
    with self.graph.as_default():
      self.encoder = g2v.GridEncoder(
          in_grid_res=self.in_grid_res,
          num_filters=self.num_filters,
          codelen=self.codelen,
          name='g2v')
      self.global_step = tf.get_variable(
          'global_step', shape=[], dtype=tf.int64)
      self.grid_ph = tf.placeholder(
          tf.float32, shape=[self.gres, self.gres, self.gres])
      self.start_ph = tf.placeholder(tf.int32, shape=[self.grid_batch, 3])
      self.ingrid = self._batch_slice(self.grid_ph, self.start_ph,
                                      self.in_grid_res, self.grid_batch)
      self.ingrid = self.ingrid[..., tf.newaxis]
      self.lats = self.encoder(self.ingrid, training=False)  # [gb, codelen]
      self.saver = tf.train.Saver()
      self.sess = tf.Session()
      self.saver.restore(self.sess, self.ckpt) 
Example #18
Source File: averaged.py    From lamb with Apache License 2.0 6 votes vote down vote up
def __init__(self, tensors):
    tensors = list(tensors)
    with tf.variable_scope('averaged'):
      self._num_samples = tf.Variable(0, name='num_samples', trainable=False)
      with tf.variable_scope('avg'):
        self._averages = [
            tf.get_variable(
                tensor.name.replace('/', '-').replace(':', '-'),
                tensor.get_shape(), initializer=tf.zeros_initializer(),
                trainable=False)
            for tensor in tensors]
      with tf.variable_scope('save'):
        self._saves = [
            tf.get_variable(
                tensor.name.replace('/', '-').replace(':', '-'),
                tensor.get_shape(), initializer=tf.zeros_initializer(),
                trainable=False)
            for tensor in tensors]
    self._tensors = tensors
    self._take_sample = self._make_take_sample()
    self._switch = self._make_swith_to_average()
    self._restore = self._make_restore()
    self._reset = self._make_reset() 
Example #19
Source File: optimization_test.py    From albert with Apache License 2.0 6 votes vote down vote up
def test_adam(self):
    with self.test_session() as sess:
      w = tf.get_variable(
          "w",
          shape=[3],
          initializer=tf.constant_initializer([0.1, -0.2, -0.1]))
      x = tf.constant([0.4, 0.2, -0.5])
      loss = tf.reduce_mean(tf.square(x - w))
      tvars = tf.trainable_variables()
      grads = tf.gradients(loss, tvars)
      global_step = tf.train.get_or_create_global_step()
      optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2)
      train_op = optimizer.apply_gradients(list(zip(grads, tvars)), global_step)
      init_op = tf.group(tf.global_variables_initializer(),
                         tf.local_variables_initializer())
      sess.run(init_op)
      for _ in range(100):
        sess.run(train_op)
      w_np = sess.run(w)
      self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2) 
Example #20
Source File: export_checkpoints.py    From albert with Apache License 2.0 6 votes vote down vote up
def get_mlm_logits(input_tensor, albert_config, mlm_positions, output_weights):
  """From run_pretraining.py."""
  input_tensor = gather_indexes(input_tensor, mlm_positions)
  with tf.variable_scope("cls/predictions"):
    # We apply one more non-linear transformation before the output layer.
    # This matrix is not used after pre-training.
    with tf.variable_scope("transform"):
      input_tensor = tf.layers.dense(
          input_tensor,
          units=albert_config.embedding_size,
          activation=modeling.get_activation(albert_config.hidden_act),
          kernel_initializer=modeling.create_initializer(
              albert_config.initializer_range))
      input_tensor = modeling.layer_norm(input_tensor)

    # The output weights are the same as the input embeddings, but there is
    # an output-only bias for each token.
    output_bias = tf.get_variable(
        "output_bias",
        shape=[albert_config.vocab_size],
        initializer=tf.zeros_initializer())
    logits = tf.matmul(
        input_tensor, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
  return logits 
Example #21
Source File: convnet_builder.py    From benchmarks with Apache License 2.0 6 votes vote down vote up
def _conv2d_impl(self, input_layer, num_channels_in, filters, kernel_size,
                   strides, padding, kernel_initializer):
    if self.use_tf_layers:
      return conv_layers.conv2d(input_layer, filters, kernel_size, strides,
                                padding, self.channel_pos,
                                kernel_initializer=kernel_initializer,
                                use_bias=False)
    else:
      weights_shape = [kernel_size[0], kernel_size[1], num_channels_in, filters]
      # We use the name 'conv2d/kernel' so the variable has the same name as its
      # tf.layers equivalent. This way, if a checkpoint is written when
      # self.use_tf_layers == True, it can be loaded when
      # self.use_tf_layers == False, and vice versa.
      weights = self.get_variable('conv2d/kernel', weights_shape,
                                  self.variable_dtype, self.dtype,
                                  initializer=kernel_initializer)
      if self.data_format == 'NHWC':
        strides = [1] + strides + [1]
      else:
        strides = [1, 1] + strides
      return tf.nn.conv2d(input_layer, weights, strides, padding,
                          data_format=self.data_format) 
Example #22
Source File: sv2p.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def init_internal_states(self):
    # Hardcoded LSTM-CONV shapes.
    # These sizes are calculated based on original atari frames.
    # TODO(mbz): find a cleaner way of doing this maybe?!
    batch_size = self.hparams.batch_size
    shapes = [(batch_size, 53, 40, 8),
              (batch_size, 53, 40, 8),
              (batch_size, 27, 20, 16),
              (batch_size, 27, 20, 16),
              (batch_size, 53, 40, 8)]

    with tf.variable_scope("clean_scope"):
      # Initialize conv-lstm states with zeros
      init = tf.zeros_initializer()
      states = []
      for i, shape in enumerate(shapes):
        # every lstm-conv state has two variables named c and h.
        c = tf.get_variable("c%d" % i, shape, trainable=False, initializer=init)
        h = tf.get_variable("h%d" % i, shape, trainable=False, initializer=init)
        states.append((c, h))
      return states 
Example #23
Source File: utils.py    From gpt2-ml with Apache License 2.0 5 votes vote down vote up
def layer_norm(input_tensor, name=None, epsilon=1e-5):
    """Run layer normalization on the last dimension of the tensor."""
    name2use = f'LayerNorm_{name}' if name is not None else name
    with tf.variable_scope(name2use, default_name='LayerNorm'):
        dim = input_tensor.shape[-1].value
        gamma = tf.get_variable('gamma', [dim], initializer=tf.constant_initializer(1))
        beta = tf.get_variable('beta', [dim], initializer=tf.constant_initializer(0))
        mean = tf.reduce_mean(input_tensor, axis=-1, keepdims=True)
        std = tf.reduce_mean(tf.square(input_tensor - mean), axis=-1, keepdims=True)
        input_tensor = (input_tensor - mean) * tf.rsqrt(std + epsilon)
        input_tensor = input_tensor * gamma + beta
    return input_tensor 
Example #24
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def embedding(x,
              vocab_size,
              dense_size,
              name=None,
              reuse=None,
              multiplier=1.0,
              symbol_dropout_rate=0.0,
              embedding_var=None,
              dtype=tf.float32):
  """Embed x of type int64 into dense vectors, reducing to max 4 dimensions."""
  with tf.variable_scope(
      name, default_name="embedding", values=[x], reuse=reuse, dtype=dtype):
    if embedding_var is None:
      embedding_var = tf.get_variable("kernel", [vocab_size, dense_size])
    # On the backwards pass, we want to convert the gradient from
    # an indexed-slices to a regular tensor before sending it back to the
    # parameter server. This avoids excess computation on the parameter server.
    if not tf.executing_eagerly():
      embedding_var = convert_gradient_to_tensor(embedding_var)
    x = dropout_no_scaling(x, 1.0 - symbol_dropout_rate)
    emb_x = gather(embedding_var, x, dtype)
    if multiplier != 1.0:
      emb_x *= multiplier
    static_shape = emb_x.shape.as_list()
    if len(static_shape) < 5:
      return emb_x
    assert len(static_shape) == 5
    # If we had an extra channel dimension, assume it's 1, i.e. shape[3] == 1.
    return tf.squeeze(emb_x, 3) 
Example #25
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def nalu(x, depth, epsilon=1e-30, name=None, reuse=None):
  """NALU as in https://arxiv.org/abs/1808.00508."""
  with tf.variable_scope(name, default_name="nalu", values=[x], reuse=reuse):
    x_shape = shape_list(x)
    x_flat = tf.reshape(x, [-1, x_shape[-1]])
    gw = tf.get_variable("w", [x_shape[-1], depth])
    g = tf.nn.sigmoid(tf.matmul(x_flat, gw))
    g = tf.reshape(g, x_shape[:-1] + [depth])
    a = nac(x, depth, name="nac_lin")
    log_x = tf.log(tf.abs(x) + epsilon)
    m = nac(log_x, depth, name="nac_log")
    return g * a + (1 - g) * tf.exp(m) 
Example #26
Source File: vq_discrete.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def __init__(self, hparams):
    self.hparams = hparams
    print ("self.hparams.z_size", self.hparams.z_size)
    # Set the discretization bottleneck specific things here
    self.hparams.z_size_per_residual = self.hparams.z_size // \
                                       self.hparams.num_residuals
    print ("self.hparams.num_residuals", self.hparams.num_residuals)
    self.hparams.block_dim = int(
        self.hparams.hidden_size // self.hparams.num_blocks)
    self.hparams.block_v_size = 2**(
        self.hparams.z_size_per_residual / self.hparams.num_blocks)
    self.hparams.block_v_size = int(self.hparams.block_v_size)
    self.means = tf.get_variable(
        name="means",
        shape=[
            self.hparams.num_blocks, self.hparams.block_v_size,
            self.hparams.block_dim
        ],
        initializer=tf.initializers.variance_scaling(distribution="uniform"))

    # Create the shadow variables if we are using EMA
    if self.hparams.ema:
      self.ema_count = tf.get_variable(
          "ema_count", [self.hparams.num_blocks, self.hparams.block_v_size],
          initializer=tf.constant_initializer(0),
          trainable=False)
      with tf.colocate_with(self.means):
        self.ema_means = tf.get_variable(
            "ema_means",
            initializer=self.means.initialized_value(),
            trainable=False) 
Example #27
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def nac(x, depth, name=None, reuse=None):
  """NAC as in https://arxiv.org/abs/1808.00508."""
  with tf.variable_scope(name, default_name="nac", values=[x], reuse=reuse):
    x_shape = shape_list(x)
    w = tf.get_variable("w", [x_shape[-1], depth])
    m = tf.get_variable("m", [x_shape[-1], depth])
    w = tf.tanh(w) * tf.nn.sigmoid(m)
    x_flat = tf.reshape(x, [-1, x_shape[-1]])
    res_flat = tf.matmul(x_flat, w)
    return tf.reshape(res_flat, x_shape[:-1] + [depth]) 
Example #28
Source File: modalities.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def get_weights(model_hparams, vocab_size, hidden_dim=None):
  """Create or get concatenated embedding or softmax variable.

  Args:
    model_hparams: HParams, model hyperparmeters.
    vocab_size: int, vocabulary size.
    hidden_dim: dim of the variable. Defaults to _model_hparams' hidden_size

  Returns:
     a list of num_shards Tensors.
  """
  if hidden_dim is None:
    hidden_dim = model_hparams.hidden_size
  num_shards = model_hparams.symbol_modality_num_shards
  shards = []
  for i in range(num_shards):
    shard_size = (vocab_size // num_shards) + (
        1 if i < vocab_size % num_shards else 0)
    var_name = "weights_%d" % i
    shards.append(
        tf.get_variable(
            var_name, [shard_size, hidden_dim],
            initializer=tf.random_normal_initializer(0.0, hidden_dim**-0.5)))
  if num_shards == 1:
    ret = shards[0]
  else:
    ret = tf.concat(shards, 0)
  # Convert ret to tensor.
  if not tf.executing_eagerly():
    ret = common_layers.convert_gradient_to_tensor(ret)
  return ret 
Example #29
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def l2_norm(x, filters=None, epsilon=1e-6, name=None, reuse=None):
  """Layer normalization with l2 norm."""
  if filters is None:
    filters = shape_list(x)[-1]
  with tf.variable_scope(name, default_name="l2_norm", values=[x], reuse=reuse):
    scale = tf.get_variable(
        "l2_norm_scale", [filters], initializer=tf.ones_initializer())
    bias = tf.get_variable(
        "l2_norm_bias", [filters], initializer=tf.zeros_initializer())
    epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
    mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
    l2norm = tf.reduce_sum(
        tf.squared_difference(x, mean), axis=[-1], keepdims=True)
    norm_x = (x - mean) * tf.rsqrt(l2norm + epsilon)
    return norm_x * scale + bias 
Example #30
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def layer_norm_vars(filters):
  """Create Variables for layer norm."""
  scale = tf.get_variable(
      "layer_norm_scale", [filters], initializer=tf.ones_initializer())
  bias = tf.get_variable(
      "layer_norm_bias", [filters], initializer=tf.zeros_initializer())
  return scale, bias