Python tensorflow.compat.v1.ones_initializer() Examples
The following are 18
code examples of tensorflow.compat.v1.ones_initializer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.compat.v1
, or try the search function
.
Example #1
Source File: utils.py From lamb with Apache License 2.0 | 6 votes |
def layer_norm(x, reduction_indices, epsilon=1e-9, gain=None, bias=None, per_element=True, scope=None): """DOC.""" reduction_indices = ensure_list(reduction_indices) mean = tf.reduce_mean(x, reduction_indices, keep_dims=True) variance = tf.reduce_mean(tf.squared_difference(x, mean), reduction_indices, keep_dims=True) normalized = (x - mean) / tf.sqrt(variance + epsilon) dtype = x.dtype shape = x.get_shape().as_list() for i in six.moves.range(len(shape)): if i not in reduction_indices or not per_element: shape[i] = 1 with tf.variable_scope(scope or 'layer_norm'): if gain is None: gain = tf.get_variable('gain', shape=shape, dtype=dtype, initializer=tf.ones_initializer()) if bias is None: bias = tf.get_variable('bias', shape=shape, dtype=dtype, initializer=tf.zeros_initializer()) return gain*normalized+bias
Example #2
Source File: common_layers.py From language with Apache License 2.0 | 6 votes |
def apply_norm(x, epsilon=1e-6): """Applies layer normalization to x. Based on "Layer Normalization": https://arxiv.org/abs/1607.06450 Args: x: <float>[..., input_size] epsilon: Used to avoid division by 0. Returns: <float>[..., input_size] """ input_size = x.get_shape()[-1] with tf.variable_scope("layer_norm", values=[x]): scale = tf.get_variable( "layer_norm_scale", [input_size], initializer=tf.ones_initializer()) bias = tf.get_variable( "layer_norm_bias", [input_size], initializer=tf.zeros_initializer()) mean = tf.reduce_mean(x, axis=[-1], keepdims=True) variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keepdims=True) norm_x = (x - mean) * tf.rsqrt(variance + epsilon) result = norm_x * scale + bias return result
Example #3
Source File: glow_ops.py From tensor2tensor with Apache License 2.0 | 6 votes |
def scale_gaussian_prior(name, z, logscale_factor=3.0, trainable=True): """Returns N(s^i * z^i, std^i) where s^i and std^i are pre-component. s^i is a learnable parameter with identity initialization. std^i is optionally learnable with identity initialization. Args: name: variable scope. z: input_tensor logscale_factor: equivalent to scaling up the learning_rate by a factor of logscale_factor. trainable: Whether or not std^i is learnt. """ with tf.variable_scope(name, reuse=tf.AUTO_REUSE): z_shape = common_layers.shape_list(z) latent_multiplier = tf.get_variable( "latent_multiplier", shape=z_shape, dtype=tf.float32, initializer=tf.ones_initializer()) log_scale = tf.get_variable( "log_scale_latent", shape=z_shape, dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=trainable) log_scale = log_scale * logscale_factor return tfp.distributions.Normal( loc=latent_multiplier * z, scale=tf.exp(log_scale))
Example #4
Source File: transformer.py From mesh with Apache License 2.0 | 6 votes |
def sublayer_rms_norm_subsampled(x, layer_stack, context, percentage=100., epsilon=1e-6): """RMS normalization.""" del layer_stack model_dim = context.model.model_dim with tf.variable_scope("layer_norm_subsampled"): scale = mtf.get_variable( context.mesh, "scale", mtf.Shape(context.model.ensemble_dims + [model_dim]), initializer=tf.ones_initializer(), dtype=context.variable_dtype) var_dim = mtf.Dimension( model_dim.name, int(math.ceil(model_dim.size * percentage/100))) var_activations = mtf.slice(x, 0, var_dim.size, var_dim.name) variance = mtf.reduce_mean( mtf.square(var_activations), reduced_dim=var_dim) return x * mtf.rsqrt(variance + epsilon) * scale
Example #5
Source File: transformer.py From mesh with Apache License 2.0 | 6 votes |
def sublayer_rms_norm(x, layer_stack, context, epsilon=1e-6, name="rms_norm"): """RMS normalization. Args: x: an input mtf.Tensor layer_stack: a LayerStack context: a Context epsilon: a float name: a string Returns: a mtf.Tensor """ del layer_stack model_dim = context.model.model_dim with tf.variable_scope(name): scale = mtf.get_variable( context.mesh, "scale", mtf.Shape(context.model.ensemble_dims + [model_dim]), initializer=tf.ones_initializer(), dtype=context.variable_dtype) variance = mtf.reduce_mean(mtf.square(x), reduced_dim=model_dim) return x * mtf.rsqrt(variance + epsilon) * scale
Example #6
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 6 votes |
def group_norm(x, filters=None, num_groups=8, epsilon=1e-5): """Group normalization as in https://arxiv.org/abs/1803.08494.""" x_shape = shape_list(x) if filters is None: filters = x_shape[-1] assert len(x_shape) == 4 assert filters % num_groups == 0 # Prepare variables. scale = tf.get_variable( "group_norm_scale", [filters], initializer=tf.ones_initializer()) bias = tf.get_variable( "group_norm_bias", [filters], initializer=tf.zeros_initializer()) epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]] # Reshape and compute group norm. x = tf.reshape(x, x_shape[:-1] + [num_groups, filters // num_groups]) # Calculate mean and variance on heights, width, channels (not groups). mean, variance = tf.nn.moments(x, [1, 2, 4], keep_dims=True) norm_x = (x - mean) * tf.rsqrt(variance + epsilon) return tf.reshape(norm_x, x_shape) * scale + bias
Example #7
Source File: resnet_model.py From rigl with Apache License 2.0 | 5 votes |
def batch_norm_relu(inputs, is_training, relu=True, init_zero=False, data_format='channels_first'): """Performs a batch normalization followed by a ReLU. Args: inputs: `Tensor` of shape `[batch, channels, ...]`. is_training: `bool` for whether the model is training. relu: `bool` if False, omits the ReLU operation. init_zero: `bool` if True, initializes scale parameter of batch normalization with 0 instead of 1 (default). data_format: `str` either "channels_first" for `[batch, channels, height, width]` or "channels_last for `[batch, height, width, channels]`. Returns: A normalized `Tensor` with the same `data_format`. """ if init_zero: gamma_initializer = tf.zeros_initializer() else: gamma_initializer = tf.ones_initializer() if data_format == 'channels_first': axis = 1 else: axis = 3 inputs = tf.layers.batch_normalization( inputs=inputs, axis=axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, center=True, scale=True, training=is_training, fused=True, gamma_initializer=gamma_initializer) if relu: inputs = tf.nn.relu(inputs) return inputs
Example #8
Source File: convnet_builder.py From benchmarks with Apache License 2.0 | 5 votes |
def _batch_norm_without_layers(self, input_layer, decay, use_scale, epsilon): """Batch normalization on `input_layer` without tf.layers.""" # We make this function as similar as possible to the # tf.contrib.layers.batch_norm, to minimize the differences between using # layers and not using layers. shape = input_layer.shape num_channels = shape[3] if self.data_format == 'NHWC' else shape[1] beta = self.get_variable('beta', [num_channels], tf.float32, tf.float32, initializer=tf.zeros_initializer()) if use_scale: gamma = self.get_variable('gamma', [num_channels], tf.float32, tf.float32, initializer=tf.ones_initializer()) else: gamma = tf.constant(1.0, tf.float32, [num_channels]) # For moving variables, we use tf.get_variable instead of self.get_variable, # since self.get_variable returns the result of tf.cast which we cannot # assign to. moving_mean = tf.get_variable('moving_mean', [num_channels], tf.float32, initializer=tf.zeros_initializer(), trainable=False) moving_variance = tf.get_variable('moving_variance', [num_channels], tf.float32, initializer=tf.ones_initializer(), trainable=False) if self.phase_train: bn, batch_mean, batch_variance = tf.nn.fused_batch_norm( input_layer, gamma, beta, epsilon=epsilon, data_format=self.data_format, is_training=True) mean_update = moving_averages.assign_moving_average( moving_mean, batch_mean, decay=decay, zero_debias=False) variance_update = moving_averages.assign_moving_average( moving_variance, batch_variance, decay=decay, zero_debias=False) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, mean_update) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, variance_update) else: bn, _, _ = tf.nn.fused_batch_norm( input_layer, gamma, beta, mean=moving_mean, variance=moving_variance, epsilon=epsilon, data_format=self.data_format, is_training=False) return bn
Example #9
Source File: layers.py From mesh with Apache License 2.0 | 5 votes |
def layer_norm(x, dim, epsilon=1e-6, name="layer_prepostprocess"): """Layer normalization over dimension dim. Args: x: a mtf.Tensor whose shape contains dim. dim: a mtf.Dimension epsilon: a floating point number name: a string used for tf.variable_scope. Returns: a mtf.Tensor with same shape as x. """ with tf.variable_scope(name + "/layer_norm"): scale = mtf.get_variable( x.mesh, "layer_norm_scale", mtf.Shape([dim]), initializer=tf.ones_initializer(), activation_dtype=x.dtype) bias = mtf.get_variable( x.mesh, "layer_norm_bias", mtf.Shape([dim]), initializer=tf.zeros_initializer(), activation_dtype=x.dtype) reduced_shape = x.shape - dim mean = mtf.reduce_mean(x, output_shape=reduced_shape) variance = mtf.reduce_mean(mtf.square(x - mean), output_shape=reduced_shape) norm_x = (x - mean) * mtf.rsqrt(variance + epsilon) return norm_x * scale + bias
Example #10
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 5 votes |
def build(self, input_shape=None): """Build `Layer`.""" input_shape = tf.TensorShape(input_shape).as_list() self.input_spec = layers().InputSpec(shape=input_shape) if not self.layer.built: self.layer.build(input_shape) self.layer.built = False if not hasattr(self.layer, "kernel"): raise ValueError("`WeightNorm` must wrap a layer that" " contains a `kernel` for weights") # The kernel's filter or unit dimension is -1 self.layer_depth = int(self.layer.kernel.shape[-1]) self.norm_axes = list(range(self.layer.kernel.shape.ndims - 1)) self.layer.v = self.layer.kernel self.layer.g = self.layer.add_variable( name="g", shape=(self.layer_depth,), initializer=tf.ones_initializer, dtype=self.layer.kernel.dtype, trainable=True) # with ops.control_dependencies([self.layer.g.assign( # self._init_norm(self.layer.v))]): # self._compute_weights() self._compute_weights() self.layer.built = True super(WeightNorm, self).build() self.built = True
Example #11
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 5 votes |
def layer_norm_vars(filters): """Create Variables for layer norm.""" scale = tf.get_variable( "layer_norm_scale", [filters], initializer=tf.ones_initializer()) bias = tf.get_variable( "layer_norm_bias", [filters], initializer=tf.zeros_initializer()) return scale, bias
Example #12
Source File: resnet.py From tensor2tensor with Apache License 2.0 | 5 votes |
def batch_norm_relu(inputs, is_training, relu=True, init_zero=False, data_format="channels_first"): """Performs a batch normalization followed by a ReLU. Args: inputs: `Tensor` of shape `[batch, channels, ...]`. is_training: `bool` for whether the model is training. relu: `bool` if False, omits the ReLU operation. init_zero: `bool` if True, initializes scale parameter of batch normalization with 0 instead of 1 (default). data_format: `str` either "channels_first" for `[batch, channels, height, width]` or "channels_last for `[batch, height, width, channels]`. Returns: A normalized `Tensor` with the same `data_format`. """ if init_zero: gamma_initializer = tf.zeros_initializer() else: gamma_initializer = tf.ones_initializer() if data_format == "channels_first": axis = 1 else: axis = 3 inputs = layers().BatchNormalization( axis=axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, center=True, scale=True, fused=True, gamma_initializer=gamma_initializer)(inputs, training=is_training) if relu: inputs = tf.nn.relu(inputs) return inputs
Example #13
Source File: batch_dqn_agent_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def _create_test_agent(self, sess): stack_size = self.stack_size class MockDQNNetwork(tf.keras.Model): """The Keras network used in tests.""" def __init__(self, num_actions, **kwargs): # This weights_initializer gives action 0 a higher weight, ensuring # that it gets picked by the argmax. super(MockDQNNetwork, self).__init__(**kwargs) weights_initializer = np.tile( np.arange(num_actions, 0, -1), (stack_size, 1)) self.layer = tf.keras.layers.Dense( num_actions, kernel_initializer=tf.constant_initializer(weights_initializer), bias_initializer=tf.ones_initializer()) def call(self, state): inputs = tf.constant( np.zeros((state.shape[0], stack_size)), dtype=tf.float32) return atari_lib.DQNNetworkType(self.layer((inputs))) agent = dopamine_connector.BatchDQNAgent( network=MockDQNNetwork, replay_capacity=100, buffer_batch_size=8, generates_trainable_dones=True, sess=sess, env_batch_size=self.env_batch_size, num_actions=self.num_actions, min_replay_history=self.min_replay_history, epsilon_fn=lambda w, x, y, z: 0.0, # No exploration. update_period=self.update_period, target_update_period=self.target_update_period, epsilon_eval=0.0) # No exploration during evaluation. # This ensures non-random action choices (since epsilon_eval = 0.0) and # skips the train_step. agent.eval_mode = True sess.run(tf.global_variables_initializer()) return agent
Example #14
Source File: tiled_linear.py From lamb with Apache License 2.0 | 5 votes |
def _get_variable(self, name, shape, default_initializer=None, default_partitioner=None, default_regularizer=None): if len(shape) != 2: return super(OverlaidTiledLinear, self)._get_variable( name, shape, default_initializer=default_initializer, default_partitioner=default_partitioner, default_regularizer=default_regularizer) else: rank = self._find_var_init_param(name, 'overlay_rank', 0) sharing_key = self._find_var_init_param(name, 'overlay_sharing_key', ':name:') if sharing_key == ':name:': sharing_key = name if sharing_key == ':shape:': sharing_key = shape if (sharing_key in self._matrix_cache and not tf.get_variable_scope().reuse): scaler = super(OverlaidTiledLinear, self)._get_variable( 's_'+name, [shape[1]], default_initializer=tf.ones_initializer()) base = scaler*self._matrix_cache[sharing_key] else: base = super(OverlaidTiledLinear, self)._get_variable( sharing_key, shape, default_initializer=default_initializer, default_partitioner=default_partitioner, default_regularizer=default_regularizer) self._matrix_cache[sharing_key] = base if rank == 0: return base else: overlay = self._low_rank_matrix(name, rank=rank, shape=shape) return base+overlay
Example #15
Source File: utils.py From Object_Detection_Tracking with Apache License 2.0 | 4 votes |
def batch_norm_act(inputs, is_training_bn: bool, act_type: Union[Text, None], init_zero: bool = False, data_format: Text = 'channels_last', momentum: float = 0.99, epsilon: float = 1e-3, use_tpu: bool = False, name: Text = None): """Performs a batch normalization followed by a non-linear activation. Args: inputs: `Tensor` of shape `[batch, channels, ...]`. is_training_bn: `bool` for whether the model is training. act_type: non-linear relu function type. If None, omits the relu operation. init_zero: `bool` if True, initializes scale parameter of batch normalization with 0 instead of 1 (default). data_format: `str` either "channels_first" for `[batch, channels, height, width]` or "channels_last for `[batch, height, width, channels]`. momentum: `float`, momentume of batch norm. epsilon: `float`, small value for numerical stability. use_tpu: `bool`, whether to use tpu version of batch norm. name: the name of the batch normalization layer Returns: A normalized `Tensor` with the same `data_format`. """ if init_zero: gamma_initializer = tf.zeros_initializer() else: gamma_initializer = tf.ones_initializer() if data_format == 'channels_first': axis = 1 else: axis = 3 inputs = tpu_batch_normalization( inputs=inputs, axis=axis, momentum=momentum, epsilon=epsilon, center=True, scale=True, training=is_training_bn, use_tpu=use_tpu, gamma_initializer=gamma_initializer, name=name) if act_type: inputs = activation_fn(inputs, act_type) return inputs
Example #16
Source File: bert.py From mesh with Apache License 2.0 | 4 votes |
def layer_norm(x, dim, epsilon=1e-6, subtract_mean=True, use_scale=True, use_bias=True, name=None): """Layer normalization over dimension dim. TODO(noam): This is cleaner than the version in mtf.layers Move this version into mtf.layers to replace the one there. Args: x: a mtf.Tensor whose shape contains dim. dim: a mtf.Dimension epsilon: a floating point number subtract_mean: a boolean use_scale: a boolean use_bias: a boolean name: a string used for tf.variable_scope. Returns: a mtf.Tensor with same shape as x. """ with tf.variable_scope(name, default_name="layer_norm"): if subtract_mean: x -= mtf.reduce_mean(x, reduced_dim=dim) variance = mtf.reduce_mean(mtf.square(x), reduced_dim=dim) x *= mtf.rsqrt(variance + epsilon) if use_scale: x *= mtf.get_variable( x.mesh, "scale", mtf.Shape([dim]), initializer=tf.ones_initializer(), activation_dtype=x.dtype) if use_bias: x += mtf.get_variable( x.mesh, "bias", mtf.Shape([dim]), initializer=tf.zeros_initializer(), activation_dtype=x.dtype) return x
Example #17
Source File: model_fns.py From language with Apache License 2.0 | 4 votes |
def one_hop(qry_input_ids, qry_input_mask, qry_entity_ids, entity_ids, entity_mask, ent2ment_ind, ent2ment_val, ment2ent_map, is_training, use_one_hot_embeddings, bert_config, qa_config, mips_config, answer_mentions=None): """One hop of propagation from input to output entities.""" # for question BOW embedding with tf.variable_scope("qry/bow"): word_weights = tf.get_variable( "word_weights", [bert_config.vocab_size, 1], dtype=tf.float32, initializer=tf.ones_initializer()) qry_seq_emb, word_emb_table = shared_qry_encoder(qry_input_ids, qry_input_mask, is_training, use_one_hot_embeddings, bert_config, qa_config) qry_start_emb, qry_end_emb = layer_qry_encoder(qry_seq_emb, qry_input_ids, qry_input_mask, is_training, bert_config, qa_config) with tf.device("/cpu:0"): # mips search. tf_db, mips_search_fn = search_utils.create_mips_searcher( mips_config.ckpt_var_name, mips_config.ckpt_path, mips_config.num_neighbors) batch_size = tf.shape(qry_entity_ids)[0] batch_entities = tf.SparseTensor( indices=tf.concat([ tf.cast(tf.expand_dims(tf.range(batch_size), 1), tf.int64), tf.cast(tf.expand_dims(qry_entity_ids, 1), tf.int64) ], axis=1), values=tf.ones_like(qry_entity_ids, tf.float32), dense_shape=[batch_size, qa_config.num_entities]) ret_entities, ret_mentions, dense_mention_vec, sp_mention_vec = follow( batch_entities, qry_start_emb, qry_end_emb, entity_ids, entity_mask, ent2ment_ind, ent2ment_val, ment2ent_map, word_emb_table, word_weights, mips_search_fn, tf_db, bert_config.hidden_size, mips_config, qa_config, is_training, answer_mentions) return ret_entities, ret_mentions, dense_mention_vec, sp_mention_vec
Example #18
Source File: attention.py From language with Apache License 2.0 | 4 votes |
def _luong_score(query, keys, scale): """Implements Luong-style (multiplicative) scoring function. This attention has two forms. The first is standard Luong attention, as described in: Minh-Thang Luong, Hieu Pham, Christopher D. Manning. "Effective Approaches to Attention-based Neural Machine Translation." EMNLP 2015. https://arxiv.org/abs/1508.04025 The second is the scaled form inspired partly by the normalized form of Bahdanau attention. To enable the second form, call this function with `scale=True`. Args: query: Tensor, shape `[batch_size, num_units]` to compare to keys. keys: Processed memory, shape `[batch_size, max_time, num_units]`. scale: Whether to apply a scale to the score function. Returns: A `[batch_size, max_time]` tensor of unnormalized score values. Raises: ValueError: If `key` and `query` depths do not match. """ depth = query.get_shape()[-1] key_units = keys.get_shape()[-1] if depth != key_units: raise ValueError( "Incompatible or unknown inner dimensions between query and keys. " "Query (%s) has units: %s. Keys (%s) have units: %s. " "Perhaps you need to set num_units to the keys' dimension (%s)?" % (query, depth, keys, key_units, key_units)) dtype = query.dtype # Reshape from [batch_size, depth] to [batch_size, 1, depth] # for matmul. query = tf.expand_dims(query, 1) # Inner product along the query units dimension. # matmul shapes: query is [batch_size, 1, depth] and # keys is [batch_size, max_time, depth]. # the inner product is asked to **transpose keys' inner shape** to get a # batched matmul on: # [batch_size, 1, depth] . [batch_size, depth, max_time] # resulting in an output shape of: # [batch_size, 1, max_time]. # we then squeeze out the center singleton dimension. score = tf.matmul(query, keys, transpose_b=True) score = tf.squeeze(score, [1]) if scale: # Scalar used in weight scaling g = tf.get_variable( "attention_g", dtype=dtype, initializer=tf.ones_initializer, shape=()) score = g * score return score