Python tensorflow.random_normal_initializer() Examples
The following are 30
code examples of tensorflow.random_normal_initializer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.

Example #1
Source File: utility.py From soccer-matlab with BSD 2-Clause "Simplified" License | 6 votes |
def define_network(constructor, config, action_size): """Constructor for the recurrent cell for the algorithm. Args: constructor: Callable returning the network as RNNCell. config: Object providing configurations via attributes. action_size: Integer indicating the amount of action dimensions. Returns: Created recurrent cell object. """ mean_weights_initializer = ( tf.contrib.layers.variance_scaling_initializer( factor=config.init_mean_factor)) logstd_initializer = tf.random_normal_initializer( config.init_logstd, 1e-10) network = constructor( config.policy_layers, config.value_layers, action_size, mean_weights_initializer=mean_weights_initializer, logstd_initializer=logstd_initializer) return network
Example #2
Source File: ops.py From SSGAN-Tensorflow with MIT License | 6 votes |
def instance_norm(input): """ Instance normalization """ with tf.variable_scope('instance_norm'): num_out = input.get_shape()[-1] scale = tf.get_variable( 'scale', [num_out], initializer=tf.random_normal_initializer(mean=1.0, stddev=0.02)) offset = tf.get_variable( 'offset', [num_out], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.02)) mean, var = tf.nn.moments(input, axes=[1, 2], keep_dims=True) epsilon = 1e-6 inv = tf.rsqrt(var + epsilon) return scale * (input - mean) * inv + offset
Example #3
Source File: universal_transformer_util.py From fine-lm with MIT License | 6 votes |
def add_depth_embedding(x): """Add n-dimensional embedding as the depth embedding (timing signal). Adds embeddings to represent the position of the step in the recurrent tower. Args: x: a tensor with shape [max_step, batch, length, depth] Returns: a Tensor the same shape as x. """ x_shape = common_layers.shape_list(x) depth = x_shape[-1] num_steps = x_shape[0] shape = [num_steps, 1, 1, depth] depth_embedding = ( tf.get_variable( "depth_embedding", shape, initializer=tf.random_normal_initializer(0, depth**-0.5)) * (depth** 0.5)) x += depth_embedding return x
Example #4
Source File: common_attention.py From fine-lm with MIT License | 6 votes |
def get_layer_timing_signal_learned_1d(channels, layer, num_layers): """get n-dimensional embedding as the layer (vertical) timing signal. Adds embeddings to represent the position of the layer in the tower. Args: channels: dimension of the timing signal layer: layer num num_layers: total number of layers Returns: a Tensor of timing signals [1, 1, channels]. """ shape = [num_layers, 1, 1, channels] layer_embedding = ( tf.get_variable( "layer_embedding", shape, initializer=tf.random_normal_initializer(0, channels**-0.5)) * (channels**0.5)) return layer_embedding[layer, :, :, :]
Example #5
Source File: net.py From progressive_growing_of_GANs with MIT License | 6 votes |
def conv2d(self, input_, n_filters, k_size, padding='same'): if not self.cfg.weight_scale: return tf.layers.conv2d(input_, n_filters, k_size, padding=padding) n_feats_in = input_.get_shape().as_list()[-1] fan_in = k_size * k_size * n_feats_in c = tf.constant(np.sqrt(2. / fan_in), dtype=tf.float32) kernel_init = tf.random_normal_initializer(stddev=1.) w_shape = [k_size, k_size, n_feats_in, n_filters] w = tf.get_variable('kernel', shape=w_shape, initializer=kernel_init) w = c * w strides = [1, 1, 1, 1] net = tf.nn.conv2d(input_, w, strides, padding=padding.upper()) b = tf.get_variable('bias', [n_filters], initializer=tf.constant_initializer(0.)) net = tf.nn.bias_add(net, b) return net
Example #6
Source File: pix2pix.py From DeepLab_v3 with MIT License | 6 votes |
def pix2pix_arg_scope(): """Returns a default argument scope for isola_net. Returns: An arg scope. """ # These parameters come from the online port, which don't necessarily match # those in the paper. # TODO(nsilberman): confirm these values with Philip. instance_norm_params = { 'center': True, 'scale': True, 'epsilon': 0.00001, } with tf.contrib.framework.arg_scope( [layers.conv2d, layers.conv2d_transpose], normalizer_fn=layers.instance_norm, normalizer_params=instance_norm_params, weights_initializer=tf.random_normal_initializer(0, 0.02)) as sc: return sc
Example #7
Source File: resnet_model.py From benchmarks with The Unlicense | 6 votes |
def resnet_backbone(image, num_blocks, group_func, block_func): """ Sec 5.1: We adopt the initialization of [15] for all convolutional layers. TensorFlow does not have the true "MSRA init". We use variance_scaling as an approximation. """ with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) """ Sec 5.1: The 1000-way fully-connected layer is initialized by drawing weights from a zero-mean Gaussian with standard deviation of 0.01. """ return logits
Example #8
Source File: madry_cifar10_model.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _conv(name, x, filter_size, in_filters, out_filters, strides): """Convolution.""" with tf.variable_scope(name, reuse=tf.AUTO_REUSE): n = filter_size * filter_size * out_filters kernel = tf.get_variable( 'DW', [filter_size, filter_size, in_filters, out_filters], tf.float32, initializer=tf.random_normal_initializer( stddev=np.sqrt(2.0 / n))) return tf.nn.conv2d(x, kernel, strides, padding='SAME')
Example #9
Source File: 13_variable_sharing.py From deep-learning-note with MIT License | 5 votes |
def two_hidden_layers_2(x): assert x.shape.as_list() == [200, 100] w1 = tf.get_variable('h1_weights', [100, 50], initializer=tf.random_normal_initializer()) b1 = tf.get_variable('h1_biases', [50], initializer=tf.constant_initializer(0.0)) h1 = tf.matmul(x, w1) + b1 assert h1.shape.as_list() == [200, 50] w2 = tf.get_variable('h2_weights', [50, 10], initializer=tf.random_normal_initializer()) b2 = tf.get_variable('h2_biases', [10], initializer=tf.constant_initializer(0.0)) logits = tf.matmul(h1, w2) + b2 return logits
Example #10
Source File: 17_conv_mnist.py From deep-learning-note with MIT License | 5 votes |
def conv_relu(inputs, filters, k_size, stride, padding, scope_name): ''' A method that does convolution + relu on inputs ''' with tf.compat.v1.variable_scope(scope_name, reuse=tf.compat.v1.AUTO_REUSE) as scope: in_channels = inputs.shape[-1] kernel = tf.compat.v1.get_variable('kernel', [k_size, k_size, in_channels, filters], initializer=tf.truncated_normal_initializer()) biases = tf.compat.v1.get_variable('biases', [filters], initializer=tf.random_normal_initializer()) conv = tf.nn.conv2d(inputs, kernel, strides=[1, stride, stride, 1], padding=padding) return tf.nn.relu(conv + biases, name=scope.name)
Example #11
Source File: 14_fully_connected.py From deep-learning-note with MIT License | 5 votes |
def fully_connected(x, output_dim, scope): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope: w = tf.get_variable('weights', [x.shape[1], output_dim], initializer=tf.random_normal_initializer()) b = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0)) return tf.matmul(x, w) + b
Example #12
Source File: 4_simulate_sin.py From deep-learning-note with MIT License | 5 votes |
def inference(input_data): with tf.variable_scope('hidden1'): # 第一层 16 个 weights = tf.get_variable("weight", [1, 16], tf.float32, initializer=tf.random_normal_initializer(0.0, 1)) biases = tf.get_variable("bias", [1, 16], tf.float32, initializer=tf.random_normal_initializer(0.0, 1)) hidden1 = tf.sigmoid(tf.multiply(input_data, weights) + biases) with tf.variable_scope('hidden2'): # 第二层 16 个 weights = tf.get_variable("weight", [16, 16], tf.float32, initializer=tf.random_normal_initializer(0.0, 1)) biases = tf.get_variable("bias", [16], tf.float32, initializer=tf.random_normal_initializer(0.0, 1)) hidden2 = tf.sigmoid(tf.matmul(hidden1, weights) + biases) with tf.variable_scope('hidden3'): # 第三层 16 个 weights = tf.get_variable("weight", [16, 16], tf.float32, initializer=tf.random_normal_initializer(0.0, 1)) biases = tf.get_variable("bias", [16], tf.float32, initializer=tf.random_normal_initializer(0.0, 1)) hidden3 = tf.sigmoid(tf.matmul(hidden2, weights) + biases) with tf.variable_scope('output_layer'): # 输出层 weights = tf.get_variable("weight", [16, 1], tf.float32, initializer=tf.random_normal_initializer(0.0, 1)) biases = tf.get_variable("bias", [1], tf.float32, initializer=tf.random_normal_initializer(0.0, 1)) output = tf.matmul(hidden3, weights) + biases return output # 训练
Example #13
Source File: layers.py From ARU-Net with GNU General Public License v2.0 | 5 votes |
def deconv2d_bn_lrn_drop(scope_or_name, inputs, kernel_shape, out_shape, subS=2, activation=tf.nn.relu, use_bn=False, use_mvn=False, is_training=True, use_lrn=False, keep_prob=1.0, dropout_maps=False, initOpt=0): with tf.variable_scope(scope_or_name): if initOpt == 0: stddev = np.sqrt(2.0 / (kernel_shape[0] * kernel_shape[1] * kernel_shape[2] + kernel_shape[3])) if initOpt == 1: stddev = 5e-2 if initOpt == 2: stddev = min(np.sqrt(2.0 / (kernel_shape[0] * kernel_shape[1] * kernel_shape[2])),5e-2) kernel = tf.get_variable("weights", kernel_shape, initializer=tf.random_normal_initializer(stddev=stddev)) bias = tf.get_variable("bias", kernel_shape[2], initializer=tf.constant_initializer(value=0.1)) conv=tf.nn.conv2d_transpose(inputs, kernel, out_shape, strides=[1, subS, subS, 1], padding='SAME', name='conv') outputs = tf.nn.bias_add(conv, bias, name='preActivation') if use_bn: # outputs = tf.layers.batch_normalization(outputs, axis=3, training=is_training, name="batchNorm") outputs = batch_norm(outputs, is_training=is_training, scale=True, fused=True, scope="batchNorm") if use_mvn: outputs = feat_norm(outputs, kernel_shape[3]) if activation: outputs = activation(outputs, name='activation') if use_lrn: outputs = tf.nn.local_response_normalization(outputs, name='localResponseNorm') if dropout_maps: conv_shape = tf.shape(outputs) n_shape = tf.stack([conv_shape[0], 1, 1, conv_shape[3]]) outputs = tf.nn.dropout(outputs, keep_prob, noise_shape=n_shape) else: outputs = tf.nn.dropout(outputs, keep_prob) return outputs
Example #14
Source File: tf_utils.py From DOTA_models with Apache License 2.0 | 5 votes |
def fc_network(x, neurons, wt_decay, name, num_pred=None, offset=0, batch_norm_param=None, dropout_ratio=0.0, is_training=None): if dropout_ratio > 0: assert(is_training is not None), \ 'is_training needs to be defined when trainnig with dropout.' repr = [] for i, neuron in enumerate(neurons): init_var = np.sqrt(2.0/neuron) if batch_norm_param is not None: x = slim.fully_connected(x, neuron, activation_fn=None, weights_initializer=tf.random_normal_initializer(stddev=init_var), weights_regularizer=slim.l2_regularizer(wt_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_param, biases_initializer=tf.zeros_initializer(), scope='{:s}_{:d}'.format(name, offset+i)) else: x = slim.fully_connected(x, neuron, activation_fn=tf.nn.relu, weights_initializer=tf.random_normal_initializer(stddev=init_var), weights_regularizer=slim.l2_regularizer(wt_decay), biases_initializer=tf.zeros_initializer(), scope='{:s}_{:d}'.format(name, offset+i)) if dropout_ratio > 0: x = slim.dropout(x, keep_prob=1-dropout_ratio, is_training=is_training, scope='{:s}_{:d}'.format('dropout_'+name, offset+i)) repr.append(x) if num_pred is not None: init_var = np.sqrt(2.0/num_pred) x = slim.fully_connected(x, num_pred, weights_regularizer=slim.l2_regularizer(wt_decay), weights_initializer=tf.random_normal_initializer(stddev=init_var), biases_initializer=tf.zeros_initializer(), activation_fn=None, scope='{:s}_pred'.format(name)) return x, repr
Example #15
Source File: resnet_model.py From DOTA_models with Apache License 2.0 | 5 votes |
def _conv(self, name, x, filter_size, in_filters, out_filters, strides): """Convolution.""" with tf.variable_scope(name): n = filter_size * filter_size * out_filters kernel = tf.get_variable( 'DW', [filter_size, filter_size, in_filters, out_filters], tf.float32, initializer=tf.random_normal_initializer( stddev=np.sqrt(2.0/n))) return tf.nn.conv2d(x, kernel, strides, padding='SAME')
Example #16
Source File: graph_builder.py From DOTA_models with Apache License 2.0 | 5 votes |
def _ReluWeightInitializer(self): with tf.name_scope(self._param_scope): return tf.random_normal_initializer(stddev=self._relu_init, seed=self._seed)
Example #17
Source File: graph_builder.py From DOTA_models with Apache License 2.0 | 5 votes |
def _EmbeddingMatrixInitializer(self, index, embedding_size): if index in self._pretrained_embeddings: return self._pretrained_embeddings[index] else: return tf.random_normal_initializer( stddev=self._embedding_init / embedding_size**.5, seed=self._seed)
Example #18
Source File: utils.py From DOTA_models with Apache License 2.0 | 5 votes |
def linear(x, out_size, do_bias=True, alpha=1.0, identity_if_possible=False, normalized=False, name=None, collections=None): """Linear (affine) transformation, y = x W + b, for a variety of configurations. Args: x: input The tensor to tranformation. out_size: The integer size of non-batch output dimension. do_bias (optional): Add a learnable bias vector to the operation. alpha (optional): A multiplicative scaling for the weight initialization of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}. identity_if_possible (optional): just return identity, if x.shape[1] == out_size. normalized (optional): Option to divide out by the norms of the rows of W. name (optional): The name prefix to add to variables. collections (optional): List of additional collections. (Placed in tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.) Returns: In the equation, y = x W + b, returns the tensorflow op that yields y. """ in_size = int(x.get_shape()[1]) # from Dimension(10) -> 10 stddev = alpha/np.sqrt(float(in_size)) mat_init = tf.random_normal_initializer(0.0, stddev) wname = (name + "/W") if name else "/W" if identity_if_possible and in_size == out_size: # Sometimes linear layers are nothing more than size adapters. return tf.identity(x, name=(wname+'_ident')) W,b = init_linear(in_size, out_size, do_bias=do_bias, alpha=alpha, normalized=normalized, name=name, collections=collections) if do_bias: return tf.matmul(x, W) + b else: return tf.matmul(x, W)
Example #19
Source File: tf_utils.py From DOTA_models with Apache License 2.0 | 5 votes |
def linear(x, n_units, scope=None, stddev=0.02, activation=lambda x: x): """Fully-connected network. Parameters ---------- x : Tensor Input tensor to the network. n_units : int Number of units to connect to. scope : str, optional Variable scope to use. stddev : float, optional Initialization's standard deviation. activation : arguments, optional Function which applies a nonlinearity Returns ------- x : Tensor Fully-connected output. """ shape = x.get_shape().as_list() with tf.variable_scope(scope or "Linear"): matrix = tf.get_variable("Matrix", [shape[1], n_units], tf.float32, tf.random_normal_initializer(stddev=stddev)) return activation(tf.matmul(x, matrix)) # %%
Example #20
Source File: layers.py From ArtGAN with BSD 3-Clause "New" or "Revised" License | 5 votes |
def conv2d(x, nout, kernel=3, std=0.02, use_b=False, strides=1, name='conv2d', print_struct=True, pad='SAME'): if pad == 0: pad = 'VALID' with tf.variable_scope(name): W = tf.get_variable('W', [kernel, kernel, x.get_shape()[1], nout], initializer=tf.random_normal_initializer(stddev=std)) conv = tf.nn.conv2d(x, W, strides=[1, 1, strides, strides], padding=pad, data_format='NCHW') if print_struct: print conv.name + ': ' + str(conv.get_shape().as_list()) if use_b: b = tf.get_variable('b', [nout], initializer=tf.constant_initializer(0.0)) conv = tf.nn.bias_add(conv, b, data_format='NCHW') return conv
Example #21
Source File: layers.py From ArtGAN with BSD 3-Clause "New" or "Revised" License | 5 votes |
def deconv2d(x, nout, kernel=3, std=0.02, use_b=False, strides=2, name='deconv2d'): # Not tested yet! with tf.variable_scope(name): shape = x.get_shape().as_list() W = tf.get_variable('W', [kernel, kernel, nout, shape[1]], initializer=tf.random_normal_initializer(stddev=std)) deconv = tf.nn.conv2d_transpose(x, W, [shape[0], nout, shape[2] * strides, shape[3] * strides], [1, 1, strides, strides], data_format='NCHW') if use_b: b = tf.get_variable('b', [nout], initializer=tf.constant_initializer(0.0)) deconv = tf.nn.bias_add(x, b, data_format='NCHW') print deconv.name + ': ' + str(deconv.get_shape().as_list()) return deconv
Example #22
Source File: layers.py From ArtGAN with BSD 3-Clause "New" or "Revised" License | 5 votes |
def linear(x, nout, std=0.02, use_b=False, init_b=0.0, name='linear'): with tf.variable_scope(name): W = tf.get_variable('W', [x.get_shape()[-1], nout], initializer=tf.random_normal_initializer(stddev=std)) lout = tf.matmul(x, W) if use_b: b = tf.get_variable('b', [nout], initializer=tf.constant_initializer(init_b)) lout = tf.nn.bias_add(lout, b) print lout.name + ': ' + str(lout.get_shape().as_list()) return lout
Example #23
Source File: rl.py From fine-lm with MIT License | 5 votes |
def feed_forward_gaussian_fun(action_space, config, observations): """Feed-forward Gaussian.""" if not isinstance(action_space, gym.spaces.box.Box): raise ValueError("Expecting continuous action space.") mean_weights_initializer = tf.contrib.layers.variance_scaling_initializer( factor=config.init_mean_factor) logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10) flat_observations = tf.reshape(observations, [ tf.shape(observations)[0], tf.shape(observations)[1], functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)]) with tf.variable_scope("network_parameters"): with tf.variable_scope("policy"): x = flat_observations for size in config.policy_layers: x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu) mean = tf.contrib.layers.fully_connected( x, action_space.shape[0], tf.tanh, weights_initializer=mean_weights_initializer) logstd = tf.get_variable( "logstd", mean.shape[2:], tf.float32, logstd_initializer) logstd = tf.tile( logstd[None, None], [tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2)) with tf.variable_scope("value"): x = flat_observations for size in config.value_layers: x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu) value = tf.contrib.layers.fully_connected(x, 1, None)[..., 0] mean = tf.check_numerics(mean, "mean") logstd = tf.check_numerics(logstd, "logstd") value = tf.check_numerics(value, "value") policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd)) return NetworkOutput(policy, value, lambda a: tf.clip_by_value(a, -2., 2))
Example #24
Source File: autoencoders.py From fine-lm with MIT License | 5 votes |
def discriminator(self, x, is_training): """Discriminator architecture based on InfoGAN. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. Returns: out_logit: the output logits (before sigmoid). """ hparams = self.hparams with tf.variable_scope( "discriminator", initializer=tf.random_normal_initializer(stddev=0.02)): batch_size, height, width = common_layers.shape_list(x)[:3] # Mapping x from [bs, h, w, c] to [bs, 1] net = tf.layers.conv2d( x, 64, (4, 4), strides=(2, 2), padding="SAME", name="d_conv1") # [bs, h/2, w/2, 64] net = lrelu(net) net = tf.layers.conv2d( net, 128, (4, 4), strides=(2, 2), padding="SAME", name="d_conv2") # [bs, h/4, w/4, 128] if hparams.discriminator_batchnorm: net = tf.layers.batch_normalization( net, training=is_training, momentum=0.999, name="d_bn2") net = lrelu(net) size = height * width net = tf.reshape(net, [batch_size, size * 8]) # [bs, h * w * 8] net = tf.layers.dense(net, 1024, name="d_fc3") # [bs, 1024] if hparams.discriminator_batchnorm: net = tf.layers.batch_normalization( net, training=is_training, momentum=0.999, name="d_bn3") net = lrelu(net) return net
Example #25
Source File: vanilla_gan.py From fine-lm with MIT License | 5 votes |
def deconv2d( input_, output_shape, k_h, k_w, d_h, d_w, stddev=0.02, name="deconv2d"): """Deconvolution layer.""" with tf.variable_scope(name): w = tf.get_variable( "w", [k_h, k_w, output_shape[-1], input_.get_shape()[-1]], initializer=tf.random_normal_initializer(stddev=stddev)) deconv = tf.nn.conv2d_transpose( input_, w, output_shape=output_shape, strides=[1, d_h, d_w, 1]) biases = tf.get_variable( "biases", [output_shape[-1]], initializer=tf.constant_initializer(0.0)) return tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
Example #26
Source File: vanilla_gan.py From fine-lm with MIT License | 5 votes |
def generator(self, z, is_training, out_shape): """Generator outputting image in [0, 1].""" hparams = self.hparams height, width, c_dim = out_shape batch_size = hparams.batch_size with tf.variable_scope( "generator", initializer=tf.random_normal_initializer(stddev=0.02)): net = tf.layers.dense(z, 1024, name="g_fc1") net = tf.layers.batch_normalization(net, training=is_training, momentum=0.999, name="g_bn1") net = lrelu(net) net = tf.layers.dense(net, 128 * (height // 4) * (width // 4), name="g_fc2") net = tf.layers.batch_normalization(net, training=is_training, momentum=0.999, name="g_bn2") net = lrelu(net) net = tf.reshape(net, [batch_size, height // 4, width // 4, 128]) net = deconv2d(net, [batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="g_dc3") net = tf.layers.batch_normalization(net, training=is_training, momentum=0.999, name="g_bn3") net = lrelu(net) net = deconv2d(net, [batch_size, height, width, c_dim], 4, 4, 2, 2, name="g_dc4") out = tf.nn.sigmoid(net) return common_layers.convert_real_to_rgb(out)
Example #27
Source File: modalities.py From fine-lm with MIT License | 5 votes |
def _get_weights(self, hidden_dim=None): """Create or get concatenated embedding or softmax variable. Args: hidden_dim: dim of the variable. Defaults to self._body_input_depth Returns: a list of self._num_shards Tensors. """ if hidden_dim is None: hidden_dim = self._body_input_depth num_shards = self._model_hparams.symbol_modality_num_shards shards = [] for i in range(num_shards): shard_size = (self._vocab_size // num_shards) + ( 1 if i < self._vocab_size % num_shards else 0) var_name = "weights_%d" % i shards.append( tf.get_variable( var_name, [shard_size, hidden_dim], initializer=tf.random_normal_initializer(0.0, hidden_dim**-0.5))) if num_shards == 1: ret = shards[0] else: ret = tf.concat(shards, 0) # Convert ret to tensor. if not tf.contrib.eager.in_eager_mode(): ret = common_layers.convert_gradient_to_tensor(ret) return ret
Example #28
Source File: common_attention.py From fine-lm with MIT License | 5 votes |
def make_edge_vectors(adjacency_matrix, num_edge_types, depth, name=None): """Gets edge vectors for the edge types in the adjacency matrix. Args: adjacency_matrix: A [batch, num_nodes, num_nodes] tensor of ints. num_edge_types: Number of different edge types depth: Number of channels name: a string Returns: A [batch, num_nodes, num_nodes, depth] vector of tensors """ with tf.variable_scope(name, default_name="edge_vectors"): att_adj_vectors_shape = [num_edge_types, depth] adjacency_matrix_shape = common_layers.shape_list(adjacency_matrix) adj_vectors = ( tf.get_variable( "adj_vectors", att_adj_vectors_shape, initializer=tf.random_normal_initializer(0, depth**-0.5)) * (depth**0.5)) # Avoiding gathers so that it works on TPUs # adjacency_matrix_one_hot has shape # [batch, num_nodes, num_nodes, num_edge_types] adjacency_matrix_one_hot = tf.one_hot(adjacency_matrix, num_edge_types) att_adj_vectors = tf.matmul( tf.reshape(tf.to_float(adjacency_matrix_one_hot), [-1, num_edge_types]), adj_vectors) return tf.reshape(att_adj_vectors, [adjacency_matrix_shape[0], adjacency_matrix_shape[1], adjacency_matrix_shape[2], depth])
Example #29
Source File: common_attention.py From fine-lm with MIT License | 5 votes |
def compute_attention_component(antecedent, total_depth, filter_width=1, padding="VALID", name="c", vars_3d_num_heads=0): """Computes attention compoenent (query, key or value). Args: antecedent: a Tensor with shape [batch, length, channels] total_depth: an integer filter_width: An integer specifying how wide you want the attention component to be. padding: One of "VALID", "SAME" or "LEFT". Default is VALID: No padding. name: a string specifying scope name. vars_3d_num_heads: an optional integer (if we want to use 3d variables) Returns: c : [batch, length, depth] tensor """ if vars_3d_num_heads > 0: assert filter_width == 1 input_depth = antecedent.get_shape().as_list()[-1] depth_per_head = total_depth // vars_3d_num_heads initializer_stddev = input_depth ** -0.5 if "q" in name: initializer_stddev *= depth_per_head ** -0.5 var = tf.get_variable( name, [input_depth, vars_3d_num_heads, total_depth // vars_3d_num_heads], initializer=tf.random_normal_initializer(stddev=initializer_stddev)) var = tf.cast(var, antecedent.dtype) var = tf.reshape(var, [input_depth, total_depth]) return tf.tensordot(antecedent, var, axes=1) if filter_width == 1: return common_layers.dense( antecedent, total_depth, use_bias=False, name=name) else: return common_layers.conv1d( antecedent, total_depth, filter_width, padding, name=name)
Example #30
Source File: ops.py From Generative-Latent-Optimization-Tensorflow with MIT License | 5 votes |
def residual_conv(input, num_filters, filter_size, stride, reuse=False, pad='SAME', dtype=tf.float32, bias=False): stride_shape = [1, stride, stride, 1] filter_shape = [filter_size, filter_size, input.get_shape()[3], num_filters] w = tf.get_variable('w', filter_shape, dtype, tf.random_normal_initializer(0.0, 0.02)) p = (filter_size - 1) // 2 x = tf.pad(input, [[0, 0], [p, p], [p, p], [0, 0]], 'REFLECT') conv = tf.nn.conv2d(x, w, stride_shape, padding='VALID') return conv