Python tensorflow.tanh() Examples
The following are 30
code examples of tensorflow.tanh().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: ex_utils.py From cs294-112_hws with MIT License | 6 votes |
def build_mlp(input_placeholder, output_size, scope, n_layers, size, activation=tf.tanh, output_activation=None): """ Builds a feedforward neural network arguments: input_placeholder: placeholder variable for the state (batch_size, input_size) output_size: size of the output layer scope: variable scope of the network n_layers: number of hidden layers size: dimension of the hidden layer activation: activation of the hidden layers output_activation: activation of the ouput layers returns: output placeholder of the network (the result of a forward pass) Hint: use tf.layers.dense """ output_placeholder = input_placeholder with tf.variable_scope(scope): for _ in range(n_layers): output_placeholder = tf.layers.dense(output_placeholder, size, activation=activation) output_placeholder = tf.layers.dense(output_placeholder, output_size, activation=output_activation) return output_placeholder
Example #2
Source File: policies.py From lirpg with MIT License | 6 votes |
def __init__(self, params, ob_space, ac_space, nbatch, nsteps): #pylint: disable=W0613 ob_shape = (nbatch,) + ob_space.shape X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs with tf.name_scope('policy_new'): activ = tf.tanh h1 = activ(tf.nn.xw_plus_b(X, params['policy/pi_fc1/w:0'], params['policy/pi_fc1/b:0'])) h2 = activ(tf.nn.xw_plus_b(h1, params['policy/pi_fc2/w:0'], params['policy/pi_fc2/b:0'])) pi = tf.nn.xw_plus_b(h2, params['policy/pi/w:0'], params['policy/pi/b:0']) logstd = params['policy/logstd:0'] pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1) self.pdtype = make_pdtype(ac_space) self.pd = self.pdtype.pdfromflat(pdparam) self.X = X
Example #3
Source File: next_frame.py From fine-lm with MIT License | 6 votes |
def stacked_lstm(self, inputs, states, hidden_size, output_size, nlayers): """Stacked LSTM layers with FC layers as input and output embeddings. Args: inputs: input tensor states: a list of internal lstm states for each layer hidden_size: number of lstm units output_size: size of the output nlayers: number of lstm layers Returns: net: output of the network skips: a list of updated lstm states for each layer """ net = inputs net = slim.layers.fully_connected( net, hidden_size, activation_fn=None, scope="af1") for i in range(nlayers): net, states[i] = self.basic_lstm( net, states[i], hidden_size, scope="alstm%d"%i) net = slim.layers.fully_connected( net, output_size, activation_fn=tf.tanh, scope="af2") return net, states
Example #4
Source File: vgslspecs.py From DOTA_models with Apache License 2.0 | 6 votes |
def _NonLinearity(self, code): """Returns the non-linearity function pointer for the given string code. For forwards compatibility, allows the full names for stand-alone non-linearities, as well as the single-letter names used in ops like C,F. Args: code: String code representing a non-linearity function. Returns: non-linearity function represented by the code. """ if code in ['s', 'Sig']: return tf.sigmoid elif code in ['t', 'Tanh']: return tf.tanh elif code in ['r', 'Relu']: return tf.nn.relu elif code in ['m', 'Smax']: return tf.nn.softmax return None
Example #5
Source File: utils.py From lirpg with MIT License | 6 votes |
def lstm(xs, ms, s, scope, nh, init_scale=1.0): nbatch, nin = [v.value for v in xs[0].get_shape()] nsteps = len(xs) with tf.variable_scope(scope): wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale)) wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale)) b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0)) c, h = tf.split(axis=1, num_or_size_splits=2, value=s) for idx, (x, m) in enumerate(zip(xs, ms)): c = c*(1-m) h = h*(1-m) z = tf.matmul(x, wx) + tf.matmul(h, wh) + b i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z) i = tf.nn.sigmoid(i) f = tf.nn.sigmoid(f) o = tf.nn.sigmoid(o) u = tf.tanh(u) c = f*c + i*u h = o*tf.tanh(c) xs[idx] = h s = tf.concat(axis=1, values=[c, h]) return xs, s
Example #6
Source File: common_layers.py From fine-lm with MIT License | 6 votes |
def conv_lstm(x, kernel_size, filters, padding="SAME", dilation_rate=(1, 1), name=None, reuse=None): """Convolutional LSTM in 1 dimension.""" with tf.variable_scope( name, default_name="conv_lstm", values=[x], reuse=reuse): gates = conv( x, 4 * filters, kernel_size, padding=padding, dilation_rate=dilation_rate) g = tf.split(layer_norm(gates, 4 * filters), 4, axis=3) new_cell = tf.sigmoid(g[0]) * x + tf.sigmoid(g[1]) * tf.tanh(g[3]) return tf.sigmoid(g[2]) * tf.tanh(new_cell)
Example #7
Source File: utils.py From HardRLWithYoutube with MIT License | 6 votes |
def lstm(xs, ms, s, scope, nh, init_scale=1.0): nbatch, nin = [v.value for v in xs[0].get_shape()] with tf.variable_scope(scope): wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale)) wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale)) b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0)) c, h = tf.split(axis=1, num_or_size_splits=2, value=s) for idx, (x, m) in enumerate(zip(xs, ms)): c = c*(1-m) h = h*(1-m) z = tf.matmul(x, wx) + tf.matmul(h, wh) + b i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z) i = tf.nn.sigmoid(i) f = tf.nn.sigmoid(f) o = tf.nn.sigmoid(o) u = tf.tanh(u) c = f*c + i*u h = o*tf.tanh(c) xs[idx] = h s = tf.concat(axis=1, values=[c, h]) return xs, s
Example #8
Source File: blocks_lstm.py From DOTA_models with Apache License 2.0 | 6 votes |
def _Apply(self, *args): xtransform = self._TransformInputs(*args) depth_axis = len(self._output_shape) - 1 if self.hidden is not None: htransform = self._TransformHidden(self.hidden) f, i, j, o = tf.split( value=htransform + xtransform, num_or_size_splits=4, axis=depth_axis) else: f, i, j, o = tf.split( value=xtransform, num_or_size_splits=4, axis=depth_axis) if self.cell is not None: self.cell = tf.sigmoid(f) * self.cell + tf.sigmoid(i) * tf.tanh(j) else: self.cell = tf.sigmoid(i) * tf.tanh(j) self.hidden = tf.sigmoid(o) * tf.tanh(self.cell) self._iter += 1 return self.hidden
Example #9
Source File: policy.py From DOTA_models with Apache License 2.0 | 6 votes |
def get_cell(self): self.cell_input_dim = self.internal_dim def mlp(cell_input, prev_internal_state): w1 = tf.get_variable('w1', [self.cell_input_dim, self.internal_dim]) b1 = tf.get_variable('b1', [self.internal_dim]) w2 = tf.get_variable('w2', [self.internal_dim, self.internal_dim]) b2 = tf.get_variable('b2', [self.internal_dim]) w3 = tf.get_variable('w3', [self.internal_dim, self.internal_dim]) b3 = tf.get_variable('b3', [self.internal_dim]) proj = tf.get_variable( 'proj', [self.internal_dim, self.output_dim]) hidden = cell_input hidden = tf.tanh(tf.nn.bias_add(tf.matmul(hidden, w1), b1)) hidden = tf.tanh(tf.nn.bias_add(tf.matmul(hidden, w2), b2)) output = tf.matmul(hidden, proj) return output, hidden return mlp
Example #10
Source File: networks.py From soccer-matlab with BSD 2-Clause "Simplified" License | 6 votes |
def __call__(self, observation, state): with tf.variable_scope('policy'): x = tf.contrib.layers.flatten(observation) mean = tf.contrib.layers.fully_connected( x, self._action_size, tf.tanh, weights_initializer=self._mean_weights_initializer) logstd = tf.get_variable('logstd', mean.shape[1:], tf.float32, self._logstd_initializer) logstd = tf.tile(logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims) with tf.variable_scope('value'): x = tf.contrib.layers.flatten(observation) for size in self._value_layers: x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu) value = tf.contrib.layers.fully_connected(x, 1, None)[:, 0] return (mean, logstd, value), state
Example #11
Source File: networks.py From soccer-matlab with BSD 2-Clause "Simplified" License | 6 votes |
def __call__(self, observation, state): with tf.variable_scope('policy'): x = tf.contrib.layers.flatten(observation) for size in self._policy_layers: x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu) mean = tf.contrib.layers.fully_connected( x, self._action_size, tf.tanh, weights_initializer=self._mean_weights_initializer) logstd = tf.get_variable( 'logstd', mean.shape[1:], tf.float32, self._logstd_initializer) logstd = tf.tile( logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims) with tf.variable_scope('value'): x = tf.contrib.layers.flatten(observation) for size in self._value_layers: x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu) value = tf.contrib.layers.fully_connected(x, 1, None)[:, 0] return (mean, logstd, value), state
Example #12
Source File: nn.py From cs294-112_hws with MIT License | 6 votes |
def call(self, inputs): mean_and_log_std = self.model(inputs) mean, log_std = tf.split(mean_and_log_std, num_or_size_splits=2, axis=1) log_std = tf.clip_by_value(log_std, -20., 2.) distribution = tfp.distributions.MultivariateNormalDiag( loc=mean, scale_diag=tf.exp(log_std) ) raw_actions = distribution.sample() if not self._reparameterize: ### Problem 1.3.A ### YOUR CODE HERE raw_actions = tf.stop_gradient(raw_actions) log_probs = distribution.log_prob(raw_actions) log_probs -= self._squash_correction(raw_actions) ### Problem 2.A ### YOUR CODE HERE self.actions = tf.tanh(raw_actions) return self.actions, log_probs
Example #13
Source File: models.py From HardRLWithYoutube with MIT License | 6 votes |
def mlp(num_layers=2, num_hidden=64, activation=tf.tanh): """ Stack of fully-connected layers to be used in a policy / q-function approximator Parameters: ---------- num_layers: int number of fully-connected layers (default: 2) num_hidden: int size of fully-connected layers (default: 64) activation: activation function (default: tf.tanh) Returns: ------- function that builds fully connected network with a given input tensor / placeholder """ def network_fn(X): h = tf.layers.flatten(X) for i in range(num_layers): h = activation(fc(h, 'mlp_fc{}'.format(i), nh=num_hidden, init_scale=np.sqrt(2))) return h, None return network_fn
Example #14
Source File: train_ac_exploration_f18.py From cs294-112_hws with MIT License | 6 votes |
def build_mlp(input_placeholder, output_size, scope, n_layers, size, activation=tf.tanh, output_activation=None): """ Builds a feedforward neural network arguments: input_placeholder: placeholder variable for the state (batch_size, input_size) output_size: size of the output layer scope: variable scope of the network n_layers: number of hidden layers size: dimension of the hidden layer activation: activation of the hidden layers output_activation: activation of the ouput layers returns: output placeholder of the network (the result of a forward pass) Hint: use tf.layers.dense """ output_placeholder = input_placeholder with tf.variable_scope(scope): for _ in range(n_layers): output_placeholder = tf.layers.dense(output_placeholder, size, activation=activation) output_placeholder = tf.layers.dense(output_placeholder, output_size, activation=output_activation) return output_placeholder
Example #15
Source File: train_ac_f18.py From cs294-112_hws with MIT License | 6 votes |
def build_mlp(input_placeholder, output_size, scope, n_layers, size, activation=tf.tanh, output_activation=None): """ Builds a feedforward neural network arguments: input_placeholder: placeholder variable for the state (batch_size, input_size) output_size: size of the output layer scope: variable scope of the network n_layers: number of hidden layers size: dimension of the hidden layer activation: activation of the hidden layers output_activation: activation of the ouput layers returns: output placeholder of the network (the result of a forward pass) Hint: use tf.layers.dense """ # YOUR HW2 CODE HERE with tf.variable_scope(scope): h = input_placeholder for i in range(n_layers): h = tf.layers.dense(h, size, activation=activation, name='h{}'.format(i + 1)) output_placeholder = tf.layers.dense(h, output_size, activation=output_activation, name='output') return output_placeholder
Example #16
Source File: train_pg_f18.py From cs294-112_hws with MIT License | 6 votes |
def build_mlp(input_placeholder, output_size, scope, n_layers, size, activation=tf.tanh, output_activation=None): """ Builds a feedforward neural network arguments: input_placeholder: placeholder variable for the state (batch_size, input_size) output_size: size of the output layer scope: variable scope of the network n_layers: number of hidden layers size: dimension of the hidden layer activation: activation of the hidden layers output_activation: activation of the ouput layers returns: output placeholder of the network (the result of a forward pass) Hint: use tf.layers.dense """ # YOUR CODE HERE with tf.variable_scope(scope): h = input_placeholder for i in range(n_layers): h = tf.layers.dense(h, size, activation=activation, name='h{}'.format(i + 1)) output_placeholder = tf.layers.dense(h, output_size, activation=output_activation, name='output') return output_placeholder
Example #17
Source File: train_policy.py From cs294-112_hws with MIT License | 6 votes |
def build_mlp(x, output_size, scope, n_layers, size, activation=tf.tanh, output_activation=None, regularizer=None): """ builds a feedforward neural network arguments: x: placeholder variable for the state (batch_size, input_size) regularizer: regularization for weights (see `build_policy()` for rest) returns: output placeholder of the network (the result of a forward pass) """ i = 0 for i in range(n_layers): x = tf.layers.dense(inputs=x,units=size, activation=activation, name='fc{}'.format(i), kernel_regularizer=regularizer, bias_regularizer=regularizer) x = tf.layers.dense(inputs=x, units=output_size, activation=output_activation, name='fc{}'.format(i + 1), kernel_regularizer=regularizer, bias_regularizer=regularizer) return x
Example #18
Source File: train_policy.py From cs294-112_hws with MIT License | 6 votes |
def build_rnn(x, h, output_size, scope, n_layers, size, activation=tf.tanh, output_activation=None, regularizer=None): """ builds a gated recurrent neural network inputs are first embedded by an MLP then passed to a GRU cell make MLP layers with `size` number of units make the GRU with `output_size` number of units use `activation` as the activation function for both MLP and GRU arguments: (see `build_policy()`) hint: use `build_mlp()` """ #====================================================================================# # ----------PROBLEM 2---------- #====================================================================================# # YOUR CODE HERE x = build_mlp(x, output_size, scope, n_layers, size, activation, activation, regularizer) gru = tf.keras.layers.GRU(output_size, activation=activation, return_sequences=False, return_state=True) x, h = gru(x, h) return x, h
Example #19
Source File: train_policy.py From cs294-112_hws with MIT License | 6 votes |
def build_critic(x, h, output_size, scope, n_layers, size, gru_size, recurrent=True, activation=tf.tanh, output_activation=None, regularizer=None): """ build recurrent critic arguments: regularizer: regularization for weights (see `build_policy()` for rest) n.b. the policy and critic should not share weights """ with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): if recurrent: x, h = build_rnn(x, h, gru_size, scope, n_layers, size, activation=activation, output_activation=output_activation, regularizer=regularizer) else: x = tf.reshape(x, (-1, x.get_shape()[1]*x.get_shape()[2])) x = build_mlp(x, gru_size, scope, n_layers + 1, size, activation=activation, output_activation=activation, regularizer=regularizer) x = tf.layers.dense(x, output_size, activation=output_activation, name='decoder', kernel_regularizer=regularizer, bias_regularizer=regularizer) return x
Example #20
Source File: networks.py From soccer-matlab with BSD 2-Clause "Simplified" License | 6 votes |
def __call__(self, observation, state): with tf.variable_scope('policy'): x = tf.contrib.layers.flatten(observation) for size in self._policy_layers[:-1]: x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu) x, state = self._cell(x, state) mean = tf.contrib.layers.fully_connected( x, self._action_size, tf.tanh, weights_initializer=self._mean_weights_initializer) logstd = tf.get_variable( 'logstd', mean.shape[1:], tf.float32, self._logstd_initializer) logstd = tf.tile( logstd[None, ...], [tf.shape(mean)[0]] + [1] * logstd.shape.ndims) with tf.variable_scope('value'): x = tf.contrib.layers.flatten(observation) for size in self._value_layers: x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu) value = tf.contrib.layers.fully_connected(x, 1, None)[:, 0] return (mean, logstd, value), state
Example #21
Source File: blocks_lstm.py From DOTA_models with Apache License 2.0 | 6 votes |
def _Apply(self, *args): xtransform = self._TransformInputs(*args) depth_axis = len(self._output_shape) - 1 if self.hidden is not None: htransform = self._TransformHidden(self.hidden) f, i, j, o = tf.split( value=htransform + xtransform, num_or_size_splits=4, axis=depth_axis) else: f, i, j, o = tf.split( value=xtransform, num_or_size_splits=4, axis=depth_axis) if self.cell is not None: self.cell = tf.sigmoid(f) * self.cell + tf.sigmoid(i) * tf.tanh(j) else: self.cell = tf.sigmoid(i) * tf.tanh(j) self.hidden = tf.sigmoid(o) * tf.tanh(self.cell) return self.hidden
Example #22
Source File: generator.py From UROP-Adversarial-Feature-Matching-for-Text-Generation with GNU Affero General Public License v3.0 | 5 votes |
def lstm(self, prev_y, prev_h, prev_c, z): hs = self.hidden_size preact = tf.einsum('ijk,ka->ija', prev_h, self.h2h_W) + \ tf.einsum('ijk,ka->ija', prev_y, self.i2h_W) + \ tf.matmul(z, self.z2h_W) + \ self.b # preactivation # [1, batch_size, hidden_size * 4] i = tf.sigmoid(preact[:, :, 0*hs: 1*hs]) f = tf.sigmoid(preact[:, :, 1*hs: 2*hs]) o = tf.sigmoid(preact[:, :, 2*hs: 3*hs]) c = tf.tanh(preact[:, :, 3*hs: 4*hs]) c = f * prev_c + i * c # [1, batch_size, hidden_size] (element-wise multiply) h = o * tf.tanh(c) # [1, batch_size, hidden_size] y = tf.einsum('ijk,ka->ija', h, self.Vhid) + self.bhid # [1, batch_size, vocab_size] # Author doesn't mention this part in his paper, but it appers in his code # So I assume this is part of his soft-max approx. strategy ---| max_y = tf.reduce_max(y, axis=1, keep_dims=True) # [1, 1, vocab_size] e = tf.exp((y - max_y) * self.L) # [1, batch_size, vocab_size] w = e / tf.reduce_sum(e, axis=1, keep_dims=True) # [1, batch_size, vocab_size] # Assumption ends here ----------------------------------------| y = tf.einsum('ijk,ka->ija', w, self.Wemb) # [1, batch_size, input_dim] return y, h, c
Example #23
Source File: actor_critic.py From lirpg with MIT License | 5 votes |
def __init__(self, inputs_tf, dimo, dimg, dimu, max_u, o_stats, g_stats, hidden, layers, **kwargs): """The actor-critic network and related training code. Args: inputs_tf (dict of tensors): all necessary inputs for the network: the observation (o), the goal (g), and the action (u) dimo (int): the dimension of the observations dimg (int): the dimension of the goals dimu (int): the dimension of the actions max_u (float): the maximum magnitude of actions; action outputs will be scaled accordingly o_stats (baselines.her.Normalizer): normalizer for observations g_stats (baselines.her.Normalizer): normalizer for goals hidden (int): number of hidden units that should be used in hidden layers layers (int): number of hidden layers """ self.o_tf = inputs_tf['o'] self.g_tf = inputs_tf['g'] self.u_tf = inputs_tf['u'] # Prepare inputs for actor and critic. o = self.o_stats.normalize(self.o_tf) g = self.g_stats.normalize(self.g_tf) input_pi = tf.concat(axis=1, values=[o, g]) # for actor # Networks. with tf.variable_scope('pi'): self.pi_tf = self.max_u * tf.tanh(nn( input_pi, [self.hidden] * self.layers + [self.dimu])) with tf.variable_scope('Q'): # for policy training input_Q = tf.concat(axis=1, values=[o, g, self.pi_tf / self.max_u]) self.Q_pi_tf = nn(input_Q, [self.hidden] * self.layers + [1]) # for critic training input_Q = tf.concat(axis=1, values=[o, g, self.u_tf / self.max_u]) self._input_Q = input_Q # exposed for tests self.Q_tf = nn(input_Q, [self.hidden] * self.layers + [1], reuse=True)
Example #24
Source File: utils.py From HardRLWithYoutube with MIT License | 5 votes |
def lnlstm(xs, ms, s, scope, nh, init_scale=1.0): nbatch, nin = [v.value for v in xs[0].get_shape()] with tf.variable_scope(scope): wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale)) gx = tf.get_variable("gx", [nh*4], initializer=tf.constant_initializer(1.0)) bx = tf.get_variable("bx", [nh*4], initializer=tf.constant_initializer(0.0)) wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale)) gh = tf.get_variable("gh", [nh*4], initializer=tf.constant_initializer(1.0)) bh = tf.get_variable("bh", [nh*4], initializer=tf.constant_initializer(0.0)) b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0)) gc = tf.get_variable("gc", [nh], initializer=tf.constant_initializer(1.0)) bc = tf.get_variable("bc", [nh], initializer=tf.constant_initializer(0.0)) c, h = tf.split(axis=1, num_or_size_splits=2, value=s) for idx, (x, m) in enumerate(zip(xs, ms)): c = c*(1-m) h = h*(1-m) z = _ln(tf.matmul(x, wx), gx, bx) + _ln(tf.matmul(h, wh), gh, bh) + b i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z) i = tf.nn.sigmoid(i) f = tf.nn.sigmoid(f) o = tf.nn.sigmoid(o) u = tf.tanh(u) c = f*c + i*u h = o*tf.tanh(_ln(c, gc, bc)) xs[idx] = h s = tf.concat(axis=1, values=[c, h]) return xs, s
Example #25
Source File: discriminator.py From UROP-Adversarial-Feature-Matching-for-Text-Generation with GNU Affero General Public License v3.0 | 5 votes |
def cp(self, input_sents, i=0): # conv and pool # https://github.com/hunkim/DeepLearningZeroToAll/blob/master/lab-11-2-mnist_deep_cnn.py with tf.variable_scope('d', reuse=True): c = tf.nn.conv2d(input_sents, tf.get_variable(self.name + '_W' + str(i)), strides=[1, 1, 1, 1], padding='VALID') c = tf.nn.tanh(c) tmp = input_sents.get_shape().as_list()[1] - self.window[i] + 1 c = tf.nn.max_pool(c, ksize=[1, tmp, 1, 1], strides=[1, 1, 1, 1], padding='VALID') c = tf.nn.dropout(c, keep_prob=self.keep_prob) return tf.reshape(c, [-1, self.input_dim])
Example #26
Source File: discretization.py From fine-lm with MIT License | 5 votes |
def tanh_discrete_bottleneck(x, bottleneck_bits, bottleneck_noise, discretize_warmup_steps, mode): """Simple discretization through tanh, flip bottleneck_noise many bits.""" x = tf.tanh(tf.layers.dense(x, bottleneck_bits, name="tanh_discrete_bottleneck")) d = x + tf.stop_gradient(2.0 * tf.to_float(tf.less(0.0, x)) - 1.0 - x) if mode == tf.estimator.ModeKeys.TRAIN: noise = tf.random_uniform(common_layers.shape_list(x)) noise = 2.0 * tf.to_float(tf.less(bottleneck_noise, noise)) - 1.0 d *= noise d = common_layers.mix(d, x, discretize_warmup_steps, mode == tf.estimator.ModeKeys.TRAIN) return d, 0.0
Example #27
Source File: policies.py From lirpg with MIT License | 5 votes |
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False): #pylint: disable=W0613 ob_shape = (nbatch,) + ob_space.shape actdim = ac_space.shape[0] X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs with tf.variable_scope("model", reuse=reuse): activ = tf.tanh h1 = activ(fc(X, 'pi_fc1', nh=64, init_scale=np.sqrt(2))) h2 = activ(fc(h1, 'pi_fc2', nh=64, init_scale=np.sqrt(2))) pi = fc(h2, 'pi', actdim, init_scale=0.01) h1 = activ(fc(X, 'vf_fc1', nh=64, init_scale=np.sqrt(2))) h2 = activ(fc(h1, 'vf_fc2', nh=64, init_scale=np.sqrt(2))) vf = fc(h2, 'vf', 1)[:,0] logstd = tf.get_variable(name="logstd", shape=[1, actdim], initializer=tf.zeros_initializer()) pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1) self.pdtype = make_pdtype(ac_space) self.pd = self.pdtype.pdfromflat(pdparam) a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = None def step(ob, *_args, **_kwargs): a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob}) return a, v, self.initial_state, neglogp def value(ob, *_args, **_kwargs): return sess.run(vf, {X:ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value ######################################################################################################################## # Intrinsic Reward Augmented Policies ########################################################################################################################
Example #28
Source File: discretization.py From fine-lm with MIT License | 5 votes |
def tanh_discrete_unbottleneck(x, hidden_size): """Simple un-discretization from tanh.""" x = tf.layers.dense(x, hidden_size, name="tanh_discrete_unbottleneck") return x
Example #29
Source File: value_model.py From BetaElephant with MIT License | 5 votes |
def get_model(name): with tf.name_scope(name) as scope: self_pos = tf.placeholder(config.dtype, config.data_shape, name='self_pos') enemy_pos = tf.placeholder(config.dtype, config.data_shape, name='enemy_pos') self_ability = tf.placeholder(config.dtype, config.data_shape, name='self_ability') enemy_ability = tf.placeholder(config.dtype, config.data_shape, name='enemy_ability') self_protect = tf.placeholder(config.dtype, config.data_shape, name='self_protect') enemy_protect = tf.placeholder(config.dtype, config.data_shape, name='enemy_protect') input_label = tf.placeholder(config.dtype, config.label_shape, name='input_label') x = tf.concat(3, [self_pos, enemy_pos, self_ability, enemy_ability, self_protect, enemy_protect], name='input_concat') y = input_label nl = tf.nn.tanh def conv_pip(name, x): with tf.name_scope(name) as scope: x = conv2d('0', x, config.data_shape[3]*2, kernel=3, stride=1, nl=nl) x = conv2d('1', x, config.data_shape[3], kernel=3, stride=1, nl=nl) return x pred = conv_pip('conv0', x) for layer in range(5): pred_branch = tf.concat(3, [pred,x], name='concate%d'%layer) pred += conv_pip('conv%d'%(layer+1), pred_branch) a = tf.Variable(2.0, dtype=tf.float32, name='control_tanh_const') x = a*tf.tanh(pred, name='control_tanh') z = tf.mul(tf.exp(x), self_ability) z_sum = tf.reduce_sum(z, reduction_indices=[1,2,3], name='partition_function') # partition function # another formula of y*logy loss = -tf.reduce_sum(tf.mul(x, y), reduction_indices=[1,2,3]) + tf.log(z_sum) z_sum = tf.reshape(z_sum, [-1, 1, 1, 1]) pred = tf.div(z, z_sum, name='predict') return Model([self_pos, enemy_pos, self_ability, enemy_ability, self_protect, enemy_protect], input_label, loss, pred, debug=[z, z_sum])
Example #30
Source File: utils.py From lirpg with MIT License | 5 votes |
def lnlstm(xs, ms, s, scope, nh, init_scale=1.0): nbatch, nin = [v.value for v in xs[0].get_shape()] nsteps = len(xs) with tf.variable_scope(scope): wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale)) gx = tf.get_variable("gx", [nh*4], initializer=tf.constant_initializer(1.0)) bx = tf.get_variable("bx", [nh*4], initializer=tf.constant_initializer(0.0)) wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale)) gh = tf.get_variable("gh", [nh*4], initializer=tf.constant_initializer(1.0)) bh = tf.get_variable("bh", [nh*4], initializer=tf.constant_initializer(0.0)) b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0)) gc = tf.get_variable("gc", [nh], initializer=tf.constant_initializer(1.0)) bc = tf.get_variable("bc", [nh], initializer=tf.constant_initializer(0.0)) c, h = tf.split(axis=1, num_or_size_splits=2, value=s) for idx, (x, m) in enumerate(zip(xs, ms)): c = c*(1-m) h = h*(1-m) z = _ln(tf.matmul(x, wx), gx, bx) + _ln(tf.matmul(h, wh), gh, bh) + b i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z) i = tf.nn.sigmoid(i) f = tf.nn.sigmoid(f) o = tf.nn.sigmoid(o) u = tf.tanh(u) c = f*c + i*u h = o*tf.tanh(_ln(c, gc, bc)) xs[idx] = h s = tf.concat(axis=1, values=[c, h]) return xs, s