Python tensorflow.compat.v1.ones_initializer() Examples

The following are 18 code examples of tensorflow.compat.v1.ones_initializer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.compat.v1 , or try the search function

Example #1

Source File: utils.py From lamb with Apache License 2.0

6 votes

def layer_norm(x, reduction_indices, epsilon=1e-9, gain=None, bias=None,
               per_element=True, scope=None):
  """DOC."""
  reduction_indices = ensure_list(reduction_indices)
  mean = tf.reduce_mean(x, reduction_indices, keep_dims=True)
  variance = tf.reduce_mean(tf.squared_difference(x, mean),
                            reduction_indices, keep_dims=True)
  normalized = (x - mean) / tf.sqrt(variance + epsilon)
  dtype = x.dtype
  shape = x.get_shape().as_list()
  for i in six.moves.range(len(shape)):
    if i not in reduction_indices or not per_element:
      shape[i] = 1
  with tf.variable_scope(scope or 'layer_norm'):
    if gain is None:
      gain = tf.get_variable('gain', shape=shape, dtype=dtype,
                             initializer=tf.ones_initializer())
    if bias is None:
      bias = tf.get_variable('bias', shape=shape, dtype=dtype,
                             initializer=tf.zeros_initializer())
  return gain*normalized+bias

Example #2

Source File: common_layers.py From language with Apache License 2.0

6 votes

def apply_norm(x, epsilon=1e-6):
  """Applies layer normalization to x.

  Based on "Layer Normalization":
  https://arxiv.org/abs/1607.06450

  Args:
    x: <float>[..., input_size]
    epsilon: Used to avoid division by 0.

  Returns:
    <float>[..., input_size]
  """
  input_size = x.get_shape()[-1]
  with tf.variable_scope("layer_norm", values=[x]):
    scale = tf.get_variable(
        "layer_norm_scale", [input_size], initializer=tf.ones_initializer())
    bias = tf.get_variable(
        "layer_norm_bias", [input_size], initializer=tf.zeros_initializer())
    mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
    variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keepdims=True)
    norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
    result = norm_x * scale + bias
    return result

Example #3

Source File: glow_ops.py From tensor2tensor with Apache License 2.0

6 votes

def scale_gaussian_prior(name, z, logscale_factor=3.0, trainable=True):
  """Returns N(s^i * z^i, std^i) where s^i and std^i are pre-component.

  s^i is a learnable parameter with identity initialization.
  std^i is optionally learnable with identity initialization.

  Args:
    name: variable scope.
    z: input_tensor
    logscale_factor: equivalent to scaling up the learning_rate by a factor
                     of logscale_factor.
    trainable: Whether or not std^i is learnt.
  """
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    z_shape = common_layers.shape_list(z)
    latent_multiplier = tf.get_variable(
        "latent_multiplier", shape=z_shape, dtype=tf.float32,
        initializer=tf.ones_initializer())
    log_scale = tf.get_variable(
        "log_scale_latent", shape=z_shape, dtype=tf.float32,
        initializer=tf.zeros_initializer(), trainable=trainable)
    log_scale = log_scale * logscale_factor
    return tfp.distributions.Normal(
        loc=latent_multiplier * z, scale=tf.exp(log_scale))

Example #4

Source File: transformer.py From mesh with Apache License 2.0

6 votes

def sublayer_rms_norm_subsampled(x, layer_stack, context, percentage=100.,
                                 epsilon=1e-6):
  """RMS normalization."""
  del layer_stack
  model_dim = context.model.model_dim
  with tf.variable_scope("layer_norm_subsampled"):
    scale = mtf.get_variable(
        context.mesh,
        "scale",
        mtf.Shape(context.model.ensemble_dims + [model_dim]),
        initializer=tf.ones_initializer(),
        dtype=context.variable_dtype)
    var_dim = mtf.Dimension(
        model_dim.name,
        int(math.ceil(model_dim.size * percentage/100)))
    var_activations = mtf.slice(x, 0, var_dim.size, var_dim.name)
    variance = mtf.reduce_mean(
        mtf.square(var_activations), reduced_dim=var_dim)
  return x * mtf.rsqrt(variance + epsilon) * scale

Example #5

Source File: transformer.py From mesh with Apache License 2.0

6 votes

def sublayer_rms_norm(x, layer_stack, context, epsilon=1e-6, name="rms_norm"):
  """RMS normalization.

  Args:
    x: an input mtf.Tensor
    layer_stack: a LayerStack
    context: a Context
    epsilon: a float
    name: a string
  Returns:
    a mtf.Tensor
  """
  del layer_stack
  model_dim = context.model.model_dim
  with tf.variable_scope(name):
    scale = mtf.get_variable(
        context.mesh,
        "scale",
        mtf.Shape(context.model.ensemble_dims + [model_dim]),
        initializer=tf.ones_initializer(),
        dtype=context.variable_dtype)
    variance = mtf.reduce_mean(mtf.square(x), reduced_dim=model_dim)
  return x * mtf.rsqrt(variance + epsilon) * scale

Example #6

Source File: common_layers.py From tensor2tensor with Apache License 2.0

6 votes

def group_norm(x, filters=None, num_groups=8, epsilon=1e-5):
  """Group normalization as in https://arxiv.org/abs/1803.08494."""
  x_shape = shape_list(x)
  if filters is None:
    filters = x_shape[-1]
  assert len(x_shape) == 4
  assert filters % num_groups == 0
  # Prepare variables.
  scale = tf.get_variable(
      "group_norm_scale", [filters], initializer=tf.ones_initializer())
  bias = tf.get_variable(
      "group_norm_bias", [filters], initializer=tf.zeros_initializer())
  epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
  # Reshape and compute group norm.
  x = tf.reshape(x, x_shape[:-1] + [num_groups, filters // num_groups])
  # Calculate mean and variance on heights, width, channels (not groups).
  mean, variance = tf.nn.moments(x, [1, 2, 4], keep_dims=True)
  norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
  return tf.reshape(norm_x, x_shape) * scale + bias

Example #7

Source File: resnet_model.py From rigl with Apache License 2.0

5 votes

def batch_norm_relu(inputs, is_training, relu=True, init_zero=False,
                    data_format='channels_first'):
  """Performs a batch normalization followed by a ReLU.

  Args:
    inputs: `Tensor` of shape `[batch, channels, ...]`.
    is_training: `bool` for whether the model is training.
    relu: `bool` if False, omits the ReLU operation.
    init_zero: `bool` if True, initializes scale parameter of batch
        normalization with 0 instead of 1 (default).
    data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.

  Returns:
    A normalized `Tensor` with the same `data_format`.
  """
  if init_zero:
    gamma_initializer = tf.zeros_initializer()
  else:
    gamma_initializer = tf.ones_initializer()

  if data_format == 'channels_first':
    axis = 1
  else:
    axis = 3

  inputs = tf.layers.batch_normalization(
      inputs=inputs,
      axis=axis,
      momentum=BATCH_NORM_DECAY,
      epsilon=BATCH_NORM_EPSILON,
      center=True,
      scale=True,
      training=is_training,
      fused=True,
      gamma_initializer=gamma_initializer)

  if relu:
    inputs = tf.nn.relu(inputs)
  return inputs

Example #8

Source File: convnet_builder.py From benchmarks with Apache License 2.0

5 votes

def _batch_norm_without_layers(self, input_layer, decay, use_scale, epsilon):
    """Batch normalization on `input_layer` without tf.layers."""
    # We make this function as similar as possible to the
    # tf.contrib.layers.batch_norm, to minimize the differences between using
    # layers and not using layers.
    shape = input_layer.shape
    num_channels = shape[3] if self.data_format == 'NHWC' else shape[1]
    beta = self.get_variable('beta', [num_channels], tf.float32, tf.float32,
                             initializer=tf.zeros_initializer())
    if use_scale:
      gamma = self.get_variable('gamma', [num_channels], tf.float32,
                                tf.float32, initializer=tf.ones_initializer())
    else:
      gamma = tf.constant(1.0, tf.float32, [num_channels])
    # For moving variables, we use tf.get_variable instead of self.get_variable,
    # since self.get_variable returns the result of tf.cast which we cannot
    # assign to.
    moving_mean = tf.get_variable('moving_mean', [num_channels],
                                  tf.float32,
                                  initializer=tf.zeros_initializer(),
                                  trainable=False)
    moving_variance = tf.get_variable('moving_variance', [num_channels],
                                      tf.float32,
                                      initializer=tf.ones_initializer(),
                                      trainable=False)
    if self.phase_train:
      bn, batch_mean, batch_variance = tf.nn.fused_batch_norm(
          input_layer, gamma, beta, epsilon=epsilon,
          data_format=self.data_format, is_training=True)
      mean_update = moving_averages.assign_moving_average(
          moving_mean, batch_mean, decay=decay, zero_debias=False)
      variance_update = moving_averages.assign_moving_average(
          moving_variance, batch_variance, decay=decay, zero_debias=False)
      tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, mean_update)
      tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, variance_update)
    else:
      bn, _, _ = tf.nn.fused_batch_norm(
          input_layer, gamma, beta, mean=moving_mean,
          variance=moving_variance, epsilon=epsilon,
          data_format=self.data_format, is_training=False)
    return bn

Example #9

Source File: layers.py From mesh with Apache License 2.0

5 votes

def layer_norm(x, dim, epsilon=1e-6, name="layer_prepostprocess"):
  """Layer normalization over dimension dim.

  Args:
    x: a mtf.Tensor whose shape contains dim.
    dim: a mtf.Dimension
    epsilon: a floating point number
    name: a string used for tf.variable_scope.

  Returns:
    a mtf.Tensor with same shape as x.
  """
  with tf.variable_scope(name + "/layer_norm"):
    scale = mtf.get_variable(
        x.mesh,
        "layer_norm_scale",
        mtf.Shape([dim]),
        initializer=tf.ones_initializer(),
        activation_dtype=x.dtype)
    bias = mtf.get_variable(
        x.mesh,
        "layer_norm_bias",
        mtf.Shape([dim]),
        initializer=tf.zeros_initializer(),
        activation_dtype=x.dtype)
    reduced_shape = x.shape - dim
    mean = mtf.reduce_mean(x, output_shape=reduced_shape)
    variance = mtf.reduce_mean(mtf.square(x - mean), output_shape=reduced_shape)
    norm_x = (x - mean) * mtf.rsqrt(variance + epsilon)
    return norm_x * scale + bias

Example #10

Source File: common_layers.py From tensor2tensor with Apache License 2.0

5 votes

def build(self, input_shape=None):
    """Build `Layer`."""
    input_shape = tf.TensorShape(input_shape).as_list()
    self.input_spec = layers().InputSpec(shape=input_shape)

    if not self.layer.built:
      self.layer.build(input_shape)
      self.layer.built = False

      if not hasattr(self.layer, "kernel"):
        raise ValueError("`WeightNorm` must wrap a layer that"
                         " contains a `kernel` for weights")

      # The kernel's filter or unit dimension is -1
      self.layer_depth = int(self.layer.kernel.shape[-1])
      self.norm_axes = list(range(self.layer.kernel.shape.ndims - 1))

      self.layer.v = self.layer.kernel
      self.layer.g = self.layer.add_variable(
          name="g",
          shape=(self.layer_depth,),
          initializer=tf.ones_initializer,
          dtype=self.layer.kernel.dtype,
          trainable=True)

      # with ops.control_dependencies([self.layer.g.assign(
      #     self._init_norm(self.layer.v))]):
      #   self._compute_weights()
      self._compute_weights()

      self.layer.built = True

    super(WeightNorm, self).build()
    self.built = True

Example #11

Source File: common_layers.py From tensor2tensor with Apache License 2.0

5 votes

def layer_norm_vars(filters):
  """Create Variables for layer norm."""
  scale = tf.get_variable(
      "layer_norm_scale", [filters], initializer=tf.ones_initializer())
  bias = tf.get_variable(
      "layer_norm_bias", [filters], initializer=tf.zeros_initializer())
  return scale, bias

Example #12

Source File: resnet.py From tensor2tensor with Apache License 2.0

5 votes

def batch_norm_relu(inputs,
                    is_training,
                    relu=True,
                    init_zero=False,
                    data_format="channels_first"):
  """Performs a batch normalization followed by a ReLU.

  Args:
    inputs: `Tensor` of shape `[batch, channels, ...]`.
    is_training: `bool` for whether the model is training.
    relu: `bool` if False, omits the ReLU operation.
    init_zero: `bool` if True, initializes scale parameter of batch
        normalization with 0 instead of 1 (default).
    data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.

  Returns:
    A normalized `Tensor` with the same `data_format`.
  """
  if init_zero:
    gamma_initializer = tf.zeros_initializer()
  else:
    gamma_initializer = tf.ones_initializer()

  if data_format == "channels_first":
    axis = 1
  else:
    axis = 3

  inputs = layers().BatchNormalization(
      axis=axis,
      momentum=BATCH_NORM_DECAY,
      epsilon=BATCH_NORM_EPSILON,
      center=True,
      scale=True,
      fused=True,
      gamma_initializer=gamma_initializer)(inputs, training=is_training)

  if relu:
    inputs = tf.nn.relu(inputs)
  return inputs

Example #13

Source File: batch_dqn_agent_test.py From tensor2tensor with Apache License 2.0

5 votes

def _create_test_agent(self, sess):
    stack_size = self.stack_size

    class MockDQNNetwork(tf.keras.Model):
      """The Keras network used in tests."""

      def __init__(self, num_actions, **kwargs):
        # This weights_initializer gives action 0 a higher weight, ensuring
        # that it gets picked by the argmax.
        super(MockDQNNetwork, self).__init__(**kwargs)
        weights_initializer = np.tile(
            np.arange(num_actions, 0, -1), (stack_size, 1))
        self.layer = tf.keras.layers.Dense(
            num_actions,
            kernel_initializer=tf.constant_initializer(weights_initializer),
            bias_initializer=tf.ones_initializer())

      def call(self, state):
        inputs = tf.constant(
            np.zeros((state.shape[0], stack_size)), dtype=tf.float32)
        return atari_lib.DQNNetworkType(self.layer((inputs)))

    agent = dopamine_connector.BatchDQNAgent(
        network=MockDQNNetwork,
        replay_capacity=100,
        buffer_batch_size=8,
        generates_trainable_dones=True,
        sess=sess,
        env_batch_size=self.env_batch_size,
        num_actions=self.num_actions,
        min_replay_history=self.min_replay_history,
        epsilon_fn=lambda w, x, y, z: 0.0,  # No exploration.
        update_period=self.update_period,
        target_update_period=self.target_update_period,
        epsilon_eval=0.0)  # No exploration during evaluation.
    # This ensures non-random action choices (since epsilon_eval = 0.0) and
    # skips the train_step.
    agent.eval_mode = True
    sess.run(tf.global_variables_initializer())
    return agent

Example #14

Source File: tiled_linear.py From lamb with Apache License 2.0

5 votes

def _get_variable(self, name, shape,
                    default_initializer=None, default_partitioner=None,
                    default_regularizer=None):
    if len(shape) != 2:
      return super(OverlaidTiledLinear, self)._get_variable(
          name, shape, default_initializer=default_initializer,
          default_partitioner=default_partitioner,
          default_regularizer=default_regularizer)
    else:
      rank = self._find_var_init_param(name, 'overlay_rank', 0)
      sharing_key = self._find_var_init_param(name, 'overlay_sharing_key',
                                              ':name:')
      if sharing_key == ':name:':
        sharing_key = name
      if sharing_key == ':shape:':
        sharing_key = shape
      if (sharing_key in self._matrix_cache and
          not tf.get_variable_scope().reuse):
        scaler = super(OverlaidTiledLinear, self)._get_variable(
            's_'+name, [shape[1]], default_initializer=tf.ones_initializer())
        base = scaler*self._matrix_cache[sharing_key]
      else:
        base = super(OverlaidTiledLinear, self)._get_variable(
            sharing_key, shape, default_initializer=default_initializer,
            default_partitioner=default_partitioner,
            default_regularizer=default_regularizer)
        self._matrix_cache[sharing_key] = base
      if rank == 0:
        return base
      else:
        overlay = self._low_rank_matrix(name, rank=rank, shape=shape)
        return base+overlay

Example #15

Source File: utils.py From Object_Detection_Tracking with Apache License 2.0

4 votes

def batch_norm_act(inputs,
                   is_training_bn: bool,
                   act_type: Union[Text, None],
                   init_zero: bool = False,
                   data_format: Text = 'channels_last',
                   momentum: float = 0.99,
                   epsilon: float = 1e-3,
                   use_tpu: bool = False,
                   name: Text = None):
  """Performs a batch normalization followed by a non-linear activation.

  Args:
    inputs: `Tensor` of shape `[batch, channels, ...]`.
    is_training_bn: `bool` for whether the model is training.
    act_type: non-linear relu function type. If None, omits the relu operation.
    init_zero: `bool` if True, initializes scale parameter of batch
      normalization with 0 instead of 1 (default).
    data_format: `str` either "channels_first" for `[batch, channels, height,
      width]` or "channels_last for `[batch, height, width, channels]`.
    momentum: `float`, momentume of batch norm.
    epsilon: `float`, small value for numerical stability.
    use_tpu: `bool`, whether to use tpu version of batch norm.
    name: the name of the batch normalization layer

  Returns:
    A normalized `Tensor` with the same `data_format`.
  """
  if init_zero:
    gamma_initializer = tf.zeros_initializer()
  else:
    gamma_initializer = tf.ones_initializer()

  if data_format == 'channels_first':
    axis = 1
  else:
    axis = 3

  inputs = tpu_batch_normalization(
      inputs=inputs,
      axis=axis,
      momentum=momentum,
      epsilon=epsilon,
      center=True,
      scale=True,
      training=is_training_bn,
      use_tpu=use_tpu,
      gamma_initializer=gamma_initializer,
      name=name)

  if act_type:
    inputs = activation_fn(inputs, act_type)
  return inputs

Example #16

Source File: bert.py From mesh with Apache License 2.0

4 votes

def layer_norm(x, dim, epsilon=1e-6,
               subtract_mean=True,
               use_scale=True,
               use_bias=True,
               name=None):
  """Layer normalization over dimension dim.

  TODO(noam): This is cleaner than the version in mtf.layers
  Move this version into mtf.layers to replace the one there.

  Args:
    x: a mtf.Tensor whose shape contains dim.
    dim: a mtf.Dimension
    epsilon: a floating point number
    subtract_mean: a boolean
    use_scale: a boolean
    use_bias: a boolean
    name: a string used for tf.variable_scope.

  Returns:
    a mtf.Tensor with same shape as x.
  """
  with tf.variable_scope(name, default_name="layer_norm"):
    if subtract_mean:
      x -= mtf.reduce_mean(x, reduced_dim=dim)
    variance = mtf.reduce_mean(mtf.square(x), reduced_dim=dim)
    x *= mtf.rsqrt(variance + epsilon)
    if use_scale:
      x *= mtf.get_variable(
          x.mesh,
          "scale",
          mtf.Shape([dim]),
          initializer=tf.ones_initializer(),
          activation_dtype=x.dtype)
    if use_bias:
      x += mtf.get_variable(
          x.mesh,
          "bias",
          mtf.Shape([dim]),
          initializer=tf.zeros_initializer(),
          activation_dtype=x.dtype)
    return x

Example #17

Source File: model_fns.py From language with Apache License 2.0

4 votes

def one_hop(qry_input_ids,
            qry_input_mask,
            qry_entity_ids,
            entity_ids,
            entity_mask,
            ent2ment_ind,
            ent2ment_val,
            ment2ent_map,
            is_training,
            use_one_hot_embeddings,
            bert_config,
            qa_config,
            mips_config,
            answer_mentions=None):
  """One hop of propagation from input to output entities."""
  # for question BOW embedding
  with tf.variable_scope("qry/bow"):
    word_weights = tf.get_variable(
        "word_weights", [bert_config.vocab_size, 1],
        dtype=tf.float32,
        initializer=tf.ones_initializer())

  qry_seq_emb, word_emb_table = shared_qry_encoder(qry_input_ids,
                                                   qry_input_mask, is_training,
                                                   use_one_hot_embeddings,
                                                   bert_config, qa_config)

  qry_start_emb, qry_end_emb = layer_qry_encoder(qry_seq_emb, qry_input_ids,
                                                 qry_input_mask, is_training,
                                                 bert_config, qa_config)

  with tf.device("/cpu:0"):
    # mips search.
    tf_db, mips_search_fn = search_utils.create_mips_searcher(
        mips_config.ckpt_var_name, mips_config.ckpt_path,
        mips_config.num_neighbors)

  batch_size = tf.shape(qry_entity_ids)[0]
  batch_entities = tf.SparseTensor(
      indices=tf.concat([
          tf.cast(tf.expand_dims(tf.range(batch_size), 1), tf.int64),
          tf.cast(tf.expand_dims(qry_entity_ids, 1), tf.int64)
      ],
                        axis=1),
      values=tf.ones_like(qry_entity_ids, tf.float32),
      dense_shape=[batch_size, qa_config.num_entities])
  ret_entities, ret_mentions, dense_mention_vec, sp_mention_vec = follow(
      batch_entities, qry_start_emb, qry_end_emb, entity_ids, entity_mask,
      ent2ment_ind, ent2ment_val, ment2ent_map, word_emb_table, word_weights,
      mips_search_fn, tf_db, bert_config.hidden_size, mips_config, qa_config,
      is_training, answer_mentions)

  return ret_entities, ret_mentions, dense_mention_vec, sp_mention_vec

Example #18

Source File: attention.py From language with Apache License 2.0

4 votes

def _luong_score(query, keys, scale):
  """Implements Luong-style (multiplicative) scoring function.

  This attention has two forms.  The first is standard Luong attention,
  as described in:

  Minh-Thang Luong, Hieu Pham, Christopher D. Manning.
  "Effective Approaches to Attention-based Neural Machine Translation."
  EMNLP 2015.  https://arxiv.org/abs/1508.04025

  The second is the scaled form inspired partly by the normalized form of
  Bahdanau attention.

  To enable the second form, call this function with `scale=True`.

  Args:
    query: Tensor, shape `[batch_size, num_units]` to compare to keys.
    keys: Processed memory, shape `[batch_size, max_time, num_units]`.
    scale: Whether to apply a scale to the score function.

  Returns:
    A `[batch_size, max_time]` tensor of unnormalized score values.

  Raises:
    ValueError: If `key` and `query` depths do not match.
  """
  depth = query.get_shape()[-1]
  key_units = keys.get_shape()[-1]
  if depth != key_units:
    raise ValueError(
        "Incompatible or unknown inner dimensions between query and keys.  "
        "Query (%s) has units: %s.  Keys (%s) have units: %s.  "
        "Perhaps you need to set num_units to the keys' dimension (%s)?"
        % (query, depth, keys, key_units, key_units))
  dtype = query.dtype

  # Reshape from [batch_size, depth] to [batch_size, 1, depth]
  # for matmul.
  query = tf.expand_dims(query, 1)

  # Inner product along the query units dimension.
  # matmul shapes: query is [batch_size, 1, depth] and
  #                keys is [batch_size, max_time, depth].
  # the inner product is asked to **transpose keys' inner shape** to get a
  # batched matmul on:
  #   [batch_size, 1, depth] . [batch_size, depth, max_time]
  # resulting in an output shape of:
  #   [batch_size, 1, max_time].
  # we then squeeze out the center singleton dimension.
  score = tf.matmul(query, keys, transpose_b=True)
  score = tf.squeeze(score, [1])

  if scale:
    # Scalar used in weight scaling
    g = tf.get_variable(
        "attention_g", dtype=dtype, initializer=tf.ones_initializer, shape=())
    score = g * score
  return score