Python tensorflow.compat.v1.tanh() Examples

The following are 30 code examples of tensorflow.compat.v1.tanh(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.compat.v1 , or try the search function .
Example #1
Source File: test_forward.py    From incubator-tvm with Apache License 2.0 6 votes vote down vote up
def test_forward_unary():
    def _test_forward_unary(op, a_min=1, a_max=5, dtype=np.float32):
        """test unary operators"""
        np_data = np.random.uniform(a_min, a_max, size=(2, 3, 5)).astype(dtype)
        tf.reset_default_graph()
        with tf.Graph().as_default():
            in_data = tf.placeholder(dtype, (2, 3, 5), name="in_data")
            out = op(in_data)
            compare_tf_with_tvm([np_data], ['in_data:0'], out.name)

    _test_forward_unary(tf.acos, -1, 1)
    _test_forward_unary(tf.asin, -1, 1)
    _test_forward_unary(tf.atanh, -1, 1)
    _test_forward_unary(tf.sinh)
    _test_forward_unary(tf.cosh)
    _test_forward_unary(tf.acosh)
    _test_forward_unary(tf.asinh)
    _test_forward_unary(tf.atan)
    _test_forward_unary(tf.sin)
    _test_forward_unary(tf.cos)
    _test_forward_unary(tf.tan)
    _test_forward_unary(tf.tanh)
    _test_forward_unary(tf.erf)
    _test_forward_unary(tf.log)
    _test_forward_unary(tf.log1p) 
Example #2
Source File: discretization.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def tanh_discrete_bottleneck(x, bottleneck_bits, bottleneck_noise,
                             discretize_warmup_steps, mode):
  """Simple discretization through tanh, flip bottleneck_noise many bits."""
  x = tf.layers.dense(x, bottleneck_bits, name="tanh_discrete_bottleneck")
  d0 = tf.stop_gradient(2.0 * tf.to_float(tf.less(0.0, x))) - 1.0
  if mode == tf.estimator.ModeKeys.TRAIN:
    x += tf.truncated_normal(
        common_layers.shape_list(x), mean=0.0, stddev=0.2)
  x = tf.tanh(x)
  d = x + tf.stop_gradient(2.0 * tf.to_float(tf.less(0.0, x)) - 1.0 - x)
  if mode == tf.estimator.ModeKeys.TRAIN:
    noise = tf.random_uniform(common_layers.shape_list(x))
    noise = 2.0 * tf.to_float(tf.less(bottleneck_noise, noise)) - 1.0
    d *= noise
  d = common_layers.mix(d, x, discretize_warmup_steps,
                        mode == tf.estimator.ModeKeys.TRAIN)
  return d, d0 
Example #3
Source File: network_functions.py    From magenta with Apache License 2.0 6 votes vote down vote up
def generator_fn_specgram(inputs, **kwargs):
  """Builds generator network."""
  # inputs = (noises, one_hot_labels)
  with tf.variable_scope('generator_cond'):
    z = tf.concat(inputs, axis=1)
  if kwargs['to_rgb_activation'] == 'tanh':
    to_rgb_activation = tf.tanh
  elif kwargs['to_rgb_activation'] == 'linear':
    to_rgb_activation = lambda x: x
  fake_images, end_points = networks.generator(
      z,
      kwargs['progress'],
      lambda block_id: _num_filters_fn(block_id, **kwargs),
      kwargs['resolution_schedule'],
      num_blocks=kwargs['num_blocks'],
      kernel_size=kwargs['kernel_size'],
      colors=2,
      to_rgb_activation=to_rgb_activation,
      simple_arch=kwargs['simple_arch'])
  shape = fake_images.shape
  normalizer = data_normalizer.registry[kwargs['data_normalizer']](kwargs)
  fake_images = normalizer.denormalize_op(fake_images)
  fake_images.set_shape(shape)
  return fake_images, end_points 
Example #4
Source File: util.py    From nni with MIT License 6 votes vote down vote up
def lstm(xs, ms, s, scope, nh, init_scale=1.0):
    """lstm cell"""
    _, nin = [v.value for v in xs[0].get_shape()] # the first is nbatch
    with tf.variable_scope(scope):
        wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale))
        wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale))
        b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0))

    c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
    for idx, (x, m) in enumerate(zip(xs, ms)):
        c = c*(1-m)
        h = h*(1-m)
        z = tf.matmul(x, wx) + tf.matmul(h, wh) + b
        i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z)
        i = tf.nn.sigmoid(i)
        f = tf.nn.sigmoid(f)
        o = tf.nn.sigmoid(o)
        u = tf.tanh(u)
        c = f*c + i*u
        h = o*tf.tanh(c)
        xs[idx] = h
    s = tf.concat(axis=1, values=[c, h])
    return xs, s 
Example #5
Source File: run_recurrent_model_boolq.py    From language with Apache License 2.0 6 votes vote down vote up
def apply_highway_lstm(x, seq_len):
  """Run a bi-directional LSTM with highway connections over `x`.

  Args:
    x: <tf.float32>[batch, seq_len, dim]
    seq_len: <tf.int32>[batch] for None, sequence lengths of `seq2`

  Returns:
    out, <tf.float32>[batch, seq_len, out_dim]
  """
  lstm_out = apply_lstm(x, seq_len)
  proj = ops.affine(x, FLAGS.lstm_dim * 4, "w", bias_name="b")
  gate, transform = tf.split(proj, 2, 2)
  gate = tf.sigmoid(gate)
  transform = tf.tanh(transform)
  return lstm_out * gate + (1 - gate) * transform 
Example #6
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def conv_lstm(x,
              kernel_size,
              filters,
              padding="SAME",
              dilation_rate=(1, 1),
              name=None,
              reuse=None):
  """Convolutional LSTM in 1 dimension."""
  with tf.variable_scope(
      name, default_name="conv_lstm", values=[x], reuse=reuse):
    gates = conv(
        x,
        4 * filters,
        kernel_size,
        padding=padding,
        dilation_rate=dilation_rate)
    g = tf.split(layer_norm(gates, 4 * filters), 4, axis=3)
    new_cell = tf.sigmoid(g[0]) * x + tf.sigmoid(g[1]) * tf.tanh(g[3])
    return tf.sigmoid(g[2]) * tf.tanh(new_cell) 
Example #7
Source File: basic_stochastic.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def update_internal_states_early(self, internal_states, frames):
    """Update the internal states early in the network in GRU-like way."""
    batch_size = common_layers.shape_list(frames[0])[0]
    internal_state = internal_states[0][0][:batch_size, :, :, :]
    state_activation = tf.concat([internal_state, frames[0]], axis=-1)
    state_gate_candidate = tf.layers.conv2d(
        state_activation, 2 * self.hparams.recurrent_state_size,
        (3, 3), padding="SAME", name="state_conv")
    state_gate, state_candidate = tf.split(state_gate_candidate, 2, axis=-1)
    state_gate = tf.nn.sigmoid(state_gate)
    state_candidate = tf.tanh(state_candidate)
    internal_state = internal_state * state_gate
    internal_state += state_candidate * (1.0 - state_gate)
    max_batch_size = max(_MAX_BATCH, self.hparams.batch_size)
    diff_batch_size = max_batch_size - batch_size
    internal_state = tf.pad(
        internal_state, [[0, diff_batch_size], [0, 0], [0, 0], [0, 0]])
    return [[internal_state]] 
Example #8
Source File: actor.py    From Hierarchical-Actor-Critc-HAC- with MIT License 6 votes vote down vote up
def create_nn(self, features, name=None):

        if name is None:
            name = self.actor_name

        with tf.variable_scope(name + '_fc_1'):
            fc1 = layer(features, 64)
        with tf.variable_scope(name + '_fc_2'):
            fc2 = layer(fc1, 64)
        with tf.variable_scope(name + '_fc_3'):
            fc3 = layer(fc2, 64)
        with tf.variable_scope(name + '_fc_4'):
            fc4 = layer(fc3, self.action_space_size, is_output=True)

        output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset

        return output 
Example #9
Source File: actor.py    From Hierarchical-Actor-Critc-HAC- with MIT License 6 votes vote down vote up
def create_nn(self, features, name=None):

        if name is None:
            name = self.actor_name

        with tf.variable_scope(name + '_fc_1'):
            fc1 = layer(features, 64)
        with tf.variable_scope(name + '_fc_2'):
            fc2 = layer(fc1, 64)
        with tf.variable_scope(name + '_fc_3'):
            fc3 = layer(fc2, 64)
        with tf.variable_scope(name + '_fc_4'):
            fc4 = layer(fc3, self.action_space_size, is_output=True)

        output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset

        return output 
Example #10
Source File: actor.py    From Hierarchical-Actor-Critc-HAC- with MIT License 6 votes vote down vote up
def create_nn(self, features, name=None):

        if name is None:
            name = self.actor_name

        with tf.variable_scope(name + '_fc_1'):
            fc1 = layer(features, 64)
        with tf.variable_scope(name + '_fc_2'):
            fc2 = layer(fc1, 64)
        with tf.variable_scope(name + '_fc_3'):
            fc3 = layer(fc2, 64)
        with tf.variable_scope(name + '_fc_4'):
            fc4 = layer(fc3, self.action_space_size, is_output=True)

        output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset

        return output 
Example #11
Source File: actor.py    From Hierarchical-Actor-Critc-HAC- with MIT License 6 votes vote down vote up
def create_nn(self, features, name=None):

        if name is None:
            name = self.actor_name

        with tf.variable_scope(name + '_fc_1'):
            fc1 = layer(features, 64)
        with tf.variable_scope(name + '_fc_2'):
            fc2 = layer(fc1, 64)
        with tf.variable_scope(name + '_fc_3'):
            fc3 = layer(fc2, 64)
        with tf.variable_scope(name + '_fc_4'):
            fc4 = layer(fc3, self.action_space_size, is_output=True)

        output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset

        return output 
Example #12
Source File: actor.py    From Hierarchical-Actor-Critc-HAC- with MIT License 6 votes vote down vote up
def create_nn(self, features, name=None):
        
        if name is None:
            name = self.actor_name

        with tf.variable_scope(name + '_fc_1'):
            fc1 = layer(features, 64)
        with tf.variable_scope(name + '_fc_2'):
            fc2 = layer(fc1, 64)
        with tf.variable_scope(name + '_fc_3'):
            fc3 = layer(fc2, 64)
        with tf.variable_scope(name + '_fc_4'):
            fc4 = layer(fc3, self.action_space_size, is_output=True)

        output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset

        return output 
Example #13
Source File: neuralnet.py    From M-LOOP with MIT License 6 votes vote down vote up
def _make_net(self, reg):
        '''
        Helper method to create a new net with a specified regularisation coefficient. The net is not
        initialised, so you must call init() or load() on it before any other method.

        Args:
            reg (float): Regularisation coefficient.
        '''
        def gelu_fast(_x):
            return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
        creator = lambda: SingleNeuralNet(
                    self.num_params,
                    [64]*5, [gelu_fast]*5,
                    0.2, # train_threshold_ratio
                    16, # batch_size
                    1., # keep_prob
                    reg,
                    self.losses_list,
                    learner_archive_dir=self.learner_archive_dir,
                    start_datetime=self.start_datetime)
        return SampledNeuralNet(creator, 1) 
Example #14
Source File: run_dual_encoder.py    From language with Apache License 2.0 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 image_vector, use_one_hot_embeddings, scope):
  """Creates a model."""
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings,
      scope=scope)

  if FLAGS.ignore_image:
    logit = tf.layers.dense(
        model.get_pooled_output(), 1, activation=tf.tanh,
        kernel_initializer=
        modeling.create_initializer(bert_config.initializer_range))
    logit = tf.squeeze(logit, axis=1)
  else:
    logit = tf.einsum("ij,ij->i", tf.layers.dense(
        image_vector,
        bert_config.hidden_size,
        activation=tf.tanh,
        kernel_initializer=
        modeling.create_initializer(bert_config.initializer_range)),
                      model.get_pooled_output(),
                      name="inner")

  return tf.stack([-logit, logit], axis=1) 
Example #15
Source File: modeling.py    From training with Apache License 2.0 5 votes vote down vote up
def gelu(x):
  """Gaussian Error Linear Unit.

  This is a smoother version of the RELU.
  Original paper: https://arxiv.org/abs/1606.08415
  Args:
    x: float Tensor to perform activation.

  Returns:
    `x` with the GELU activation applied.
  """
  cdf = 0.5 * (1.0 + tf.tanh(
      (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
  return x * cdf 
Example #16
Source File: modeling.py    From training with Apache License 2.0 5 votes vote down vote up
def get_activation(activation_string):
  """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`.

  Args:
    activation_string: String name of the activation function.

  Returns:
    A Python function corresponding to the activation function. If
    `activation_string` is None, empty, or "linear", this will return None.
    If `activation_string` is not a string, it will return `activation_string`.

  Raises:
    ValueError: The `activation_string` does not correspond to a known
      activation.
  """

  # We assume that anything that"s not a string is already an activation
  # function, so we just return it.
  if not isinstance(activation_string, six.string_types):
    return activation_string

  if not activation_string:
    return None

  act = activation_string.lower()
  if act == "linear":
    return None
  elif act == "relu":
    return tf.nn.relu
  elif act == "gelu":
    return gelu
  elif act == "tanh":
    return tf.tanh
  else:
    raise ValueError("Unsupported activation: %s" % act) 
Example #17
Source File: model.py    From gpt2-estimator with MIT License 5 votes vote down vote up
def gelu(x):
    return 0.5*x*(1+tf.tanh(np.sqrt(2/np.pi)*(x+0.044715*tf.pow(x, 3)))) 
Example #18
Source File: test_forward.py    From incubator-tvm with Apache License 2.0 5 votes vote down vote up
def test_forward_tanh():
    ishape = (1, 3, 10, 10)
    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
    with tf.Graph().as_default():
        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
        tf.nn.tanh(in1)
        compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Tanh:0')


#######################################################################
# Softmax
# ------- 
Example #19
Source File: lstm_utils.py    From magenta with Apache License 2.0 5 votes vote down vote up
def initial_cell_state_from_embedding(cell, z, name=None):
  """Computes an initial RNN `cell` state from an embedding, `z`."""
  flat_state_sizes = tf.nest.flatten(cell.state_size)
  return tf.nest.pack_sequence_as(
      cell.zero_state(batch_size=z.shape[0], dtype=tf.float32),
      tf.split(
          tf.layers.dense(
              z,
              sum(flat_state_sizes),
              activation=tf.tanh,
              kernel_initializer=tf.random_normal_initializer(stddev=0.001),
              name=name),
          flat_state_sizes,
          axis=1)) 
Example #20
Source File: models.py    From graphics with Apache License 2.0 5 votes vote down vote up
def _compute_sdf(self, x, translations, blend_terms, points):
    """Compute signed distances between query points and hyperplanes."""
    n_parts = tf.shape(x)[1]
    n_planes = tf.shape(x)[2]
    norm_logit = x[..., 0:self._dims - 1]
    offset = (-(tf.nn.sigmoid(x[..., self._dims - 1:self._dims]) *
                self._offset_scale + self._offset_lbound))
    blend_planes = (
        tf.nn.sigmoid(blend_terms[..., :n_parts]) * self._blend_scale +
        self._blend_lbound)

    # Norm of the boundary line
    norm_rad = tf.tanh(norm_logit) * np.pi  # [..., (azimuth, altitude)]
    if self._dims == 3:
      norm = tf.stack([
          tf.sin(norm_rad[..., 1]) * tf.cos(norm_rad[..., 0]),
          tf.sin(norm_rad[..., 1]) * tf.sin(norm_rad[..., 0]),
          tf.cos(norm_rad[..., 1])
      ],
                      axis=-1)
    else:
      norm = tf.concat([tf.cos(norm_rad), tf.sin(norm_rad)], axis=-1)

    # Calculate signed distances to hyperplanes.
    points = (
        tf.expand_dims(points, axis=1) - tf.expand_dims(translations, axis=2))
    points = tf.expand_dims(points, axis=2)
    points = tf.tile(points, [1, 1, n_planes, 1, 1])
    signed_dis = tf.matmul(points, tf.expand_dims(norm, axis=-1))
    signed_dis = signed_dis + tf.expand_dims(offset, axis=-2)

    return signed_dis, translations, blend_planes, offset 
Example #21
Source File: rnn.py    From magenta with Apache License 2.0 5 votes vote down vote up
def __call__(self, x, state, timestep=0, scope=None):
    with tf.variable_scope(scope or type(self).__name__):
      h, c = tf.split(state, 2, 1)

      h_size = self.num_units
      x_size = x.get_shape().as_list()[1]
      batch_size = x.get_shape().as_list()[0]

      w_init = None  # uniform

      h_init = lstm_ortho_initializer(1.0)

      w_xh = tf.get_variable(
          'W_xh', [x_size, 4 * self.num_units], initializer=w_init)
      w_hh = tf.get_variable(
          'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init)

      concat = tf.concat([x, h], 1)  # concat for speed.
      w_full = tf.concat([w_xh, w_hh], 0)
      concat = tf.matmul(concat, w_full)  #+ bias # live life without garbage.

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      concat = layer_norm_all(concat, batch_size, 4, h_size, 'ln_all')
      i, j, f, o = tf.split(concat, 4, 1)

      if self.use_recurrent_dropout:
        g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob)
      else:
        g = tf.tanh(j)

      new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g
      new_h = tf.tanh(layer_norm(new_c, h_size, 'ln_c')) * tf.sigmoid(o)

    return new_h, tf.concat([new_h, new_c], 1) 
Example #22
Source File: rnn.py    From magenta with Apache License 2.0 5 votes vote down vote up
def __call__(self, x, state, scope=None):
    with tf.variable_scope(scope or type(self).__name__):
      c, h = tf.split(state, 2, 1)

      x_size = x.get_shape().as_list()[1]

      w_init = None  # uniform

      h_init = lstm_ortho_initializer(1.0)

      # Keep W_xh and W_hh separate here as well to use different init methods.
      w_xh = tf.get_variable(
          'W_xh', [x_size, 4 * self.num_units], initializer=w_init)
      w_hh = tf.get_variable(
          'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init)
      bias = tf.get_variable(
          'bias', [4 * self.num_units],
          initializer=tf.constant_initializer(0.0))

      concat = tf.concat([x, h], 1)
      w_full = tf.concat([w_xh, w_hh], 0)
      hidden = tf.matmul(concat, w_full) + bias

      i, j, f, o = tf.split(hidden, 4, 1)

      if self.use_recurrent_dropout:
        g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob)
      else:
        g = tf.tanh(j)

      new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g
      new_h = tf.tanh(new_c) * tf.sigmoid(o)

      return new_h, tf.concat([new_c, new_h], 1)  # fuk tuples. 
Example #23
Source File: modeling.py    From albert with Apache License 2.0 5 votes vote down vote up
def get_activation(activation_string):
  """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`.

  Args:
    activation_string: String name of the activation function.

  Returns:
    A Python function corresponding to the activation function. If
    `activation_string` is None, empty, or "linear", this will return None.
    If `activation_string` is not a string, it will return `activation_string`.

  Raises:
    ValueError: The `activation_string` does not correspond to a known
      activation.
  """

  # We assume that anything that"s not a string is already an activation
  # function, so we just return it.
  if not isinstance(activation_string, six.string_types):
    return activation_string

  if not activation_string:
    return None

  act = activation_string.lower()
  if act == "linear":
    return None
  elif act == "relu":
    return tf.nn.relu
  elif act == "gelu":
    return gelu
  elif act == "tanh":
    return tf.tanh
  else:
    raise ValueError("Unsupported activation: %s" % act) 
Example #24
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def gru_feedfwd(a_t, h_prev, filters, name=None):
  """position-wise Feed-fwd GRU gates following the MPNN.

  Args:
    a_t: Tensor of shape [batch, length, depth] of current input
    h_prev: Tensor of shape [batch, length, depth] of prev input
    filters: an integer specifying number of dimensions of the filters
    name: A string
  Returns:
    h_t: [batch, length, filters] hidden state
  """

  with tf.variable_scope(name, default_name="GRU", values=[a_t, h_prev]):
    # we use right matrix multiplication to handle batches
    # W_z and W_r have shape 2d, d. U_z U_r have shape d,d
    z_t = (
        tf.sigmoid(
            tpu_conv1d(a_t, filters, 1, padding="SAME", name="W_z") +
            tpu_conv1d(h_prev, filters, 1, padding="SAME", name="U_z")))
    r_t = (
        tf.sigmoid(
            tpu_conv1d(a_t, filters, 1, padding="SAME", name="W_r") +
            tpu_conv1d(h_prev, filters, 1, padding="SAME", name="U_r")))
    h_tilde = (
        tf.tanh(
            tpu_conv1d(a_t, filters, 1, padding="SAME", name="W") +
            tpu_conv1d(r_t * h_prev, filters, 1, padding="SAME", name="U")))
    h_t = (1. - z_t) * h_prev + z_t * h_tilde

  return h_t 
Example #25
Source File: rl.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def feed_forward_gaussian_fun(action_space, config, observations):
  """Feed-forward Gaussian."""
  if not isinstance(action_space, gym.spaces.box.Box):
    raise ValueError("Expecting continuous action space.")

  mean_weights_initializer = tf.initializers.variance_scaling(
      scale=config.init_mean_factor)
  logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)

  flat_observations = tf.reshape(observations, [
      tf.shape(observations)[0], tf.shape(observations)[1],
      functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)])

  with tf.variable_scope("network_parameters"):
    with tf.variable_scope("policy"):
      x = flat_observations
      for size in config.policy_layers:
        x = tf.layers.dense(x, size, activation=tf.nn.relu)
      mean = tf.layers.dense(
          x, action_space.shape[0], activation=tf.tanh,
          kernel_initializer=mean_weights_initializer)
      logstd = tf.get_variable(
          "logstd", mean.shape[2:], tf.float32, logstd_initializer)
      logstd = tf.tile(
          logstd[None, None],
          [tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2))
    with tf.variable_scope("value"):
      x = flat_observations
      for size in config.value_layers:
        x = tf.layers.dense(x, size, activation=tf.nn.relu)
      value = tf.layers.dense(x, 1)[..., 0]
  mean = tf.check_numerics(mean, "mean")
  logstd = tf.check_numerics(logstd, "logstd")
  value = tf.check_numerics(value, "value")

  policy = tfp.distributions.MultivariateNormalDiag(mean, tf.exp(logstd))

  return NetworkOutput(policy, value, lambda a: tf.clip_by_value(a, -2., 2)) 
Example #26
Source File: shuffle_network.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def gated_linear_map(self, inputs, suffix, bias_start_reset, in_units,
                       out_units):
    """Linear mapping with two reset gates.

    Args:
      inputs: Input tensor
      suffix: Linear map name suffix
      bias_start_reset: Bias start value for reset gate
      in_units: Size of input tensor feature map count
      out_units: Size of output tensor feature map count
    Return:
      tf.Tensor: Convolution apply to input tensor
    """

    def reset_gate(name):
      prefix = self.prefix + name + suffix
      reset = conv_linear_map(inputs, in_units * 2, in_units * 2,
                              bias_start_reset, prefix)
      return tf.nn.sigmoid(reset)

    in_shape = [self.batch_size, self.length // 2, in_units * 2]
    inputs = tf.reshape(inputs, in_shape)

    reset1 = reset_gate("/reset1/")
    reset2 = reset_gate("/reset2/")
    res1 = conv_linear_map(inputs * reset1, in_units * 2, out_units, 0.0,
                           self.prefix + "/cand1/" + suffix)
    res2 = conv_linear_map(inputs * reset2, in_units * 2, out_units, 0.0,
                           self.prefix + "/cand2/" + suffix)

    res = tf.concat([res1, res2], axis=2)
    res = tf.reshape(res, [self.batch_size, self.length, out_units])
    return tf.nn.tanh(res) 
Example #27
Source File: autoencoders.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def bottleneck(self, x):
    with tf.variable_scope("bottleneck"):
      hparams = self.hparams
      x = tf.layers.dense(x, hparams.bottleneck_bits, name="bottleneck")
      if hparams.mode == tf.estimator.ModeKeys.TRAIN:
        noise = 2.0 * tf.random_uniform(common_layers.shape_list(x)) - 1.0
        return tf.tanh(x) + noise * hparams.bottleneck_noise, 0.0
      return tf.tanh(x), 0.0 
Example #28
Source File: autoencoders.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def bottleneck(self, x):
    hparams = self.hparams
    x = tf.tanh(tf.layers.dense(x, hparams.bottleneck_bits, name="bottleneck"))
    d = x + tf.stop_gradient(2.0 * tf.to_float(tf.less(0.0, x)) - 1.0 - x)
    if hparams.mode == tf.estimator.ModeKeys.TRAIN:
      noise = tf.random_uniform(common_layers.shape_list(x))
      noise = 2.0 * tf.to_float(tf.less(hparams.bottleneck_noise, noise)) - 1.0
      d *= noise
    x = common_layers.mix(d, x, hparams.discretize_warmup_steps,
                          hparams.mode == tf.estimator.ModeKeys.TRAIN)
    return x, 0.0 
Example #29
Source File: autoencoders.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def unstack(self, b, size, bottleneck_bits, name):
    with tf.variable_scope(name + "_unstack"):
      unb = self.unbottleneck(b, size)
      dec = self.decoder(unb)
      pred = tf.layers.dense(dec, bottleneck_bits, name="pred")
      pred_shape = common_layers.shape_list(pred)
      pred1 = tf.reshape(pred, pred_shape[:-1] + [-1, 2])
      x, y = tf.split(pred1, 2, axis=-1)
      x = tf.squeeze(x, axis=[-1])
      y = tf.squeeze(y, axis=[-1])
      gt = 2.0 * tf.to_float(tf.less(x, y)) - 1.0
      gtc = tf.tanh(y - x)
      gt += gtc - tf.stop_gradient(gtc)
      return gt, pred1 
Example #30
Source File: modeling.py    From albert with Apache License 2.0 5 votes vote down vote up
def gelu(x):
  """Gaussian Error Linear Unit.

  This is a smoother version of the RELU.
  Original paper: https://arxiv.org/abs/1606.08415
  Args:
    x: float Tensor to perform activation.

  Returns:
    `x` with the GELU activation applied.
  """
  cdf = 0.5 * (1.0 + tf.tanh(
      (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
  return x * cdf