Python tensorflow.contrib.layers.layer_norm() Examples

The following are 30 code examples of tensorflow.contrib.layers.layer_norm(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.contrib.layers , or try the search function .
Example #1
Source File: models.py    From ICML2019-TREX with MIT License 6 votes vote down vote up
def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    convs: [(int, int int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    hiddens: [int]
        list of sizes of hidden layers
    dueling: bool
        if true double the output MLP to compute a baseline
        for action scores

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """

    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) 
Example #2
Source File: models.py    From sonic_contest with MIT License 6 votes vote down vote up
def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    convs: [(int, int int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    hiddens: [int]
        list of sizes of hidden layers
    dueling: bool
        if true double the output MLP to compute a baseline
        for action scores

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """

    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) 
Example #3
Source File: models.py    From learning2run with MIT License 6 votes vote down vote up
def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    convs: [(int, int int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    hiddens: [int]
        list of sizes of hidden layers
    dueling: bool
        if true double the output MLP to compute a baseline
        for action scores

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """

    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) 
Example #4
Source File: decoder_conv.py    From conv-ensemble-str with Apache License 2.0 6 votes vote down vote up
def create_logit(self, next_layer, att_scores, output_collection, scope):
    # output
    with tf.variable_scope(scope):
      if not self.is_training:
        # only keep the last time step
        # [N/B, M, C] --> [N/B, 1, C]
        next_layer = next_layer[:, -1:, :]
        # [N/B, L, M, H, W] --> [N/B, L, H, W]
        att_scores = att_scores[:, :, -1, :, :]

      next_layer = self.linear_mapping_weightnorm(
          next_layer,
          out_dim=self.params["nout_embed"],
          output_collection=output_collection)
      next_layer = layer_norm(next_layer, begin_norm_axis=2)
      next_layer = self.linear_mapping_weightnorm(
          next_layer,
          out_dim=self.num_charset,
          var_scope_name="liear_logits",
          output_collection=output_collection)

    return next_layer, att_scores 
Example #5
Source File: models.py    From ape-x with Apache License 2.0 6 votes vote down vote up
def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    convs: [(int, int int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    hiddens: [int]
        list of sizes of hidden layers
    dueling: bool
        if true double the output MLP to compute a baseline
        for action scores

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """

    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) 
Example #6
Source File: models.py    From rl-attack with MIT License 6 votes vote down vote up
def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    convs: [(int, int int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    hiddens: [int]
        list of sizes of hidden layers
    dueling: bool
        if true double the output MLP to compute a baseline
        for action scores

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """

    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) 
Example #7
Source File: models.py    From rl_graph_generation with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    convs: [(int, int int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    hiddens: [int]
        list of sizes of hidden layers
    dueling: bool
        if true double the output MLP to compute a baseline
        for action scores

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """

    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) 
Example #8
Source File: models.py    From DRL_DeliveryDuel with MIT License 6 votes vote down vote up
def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    convs: [(int, int int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    hiddens: [int]
        list of sizes of hidden layers
    dueling: bool
        if true double the output MLP to compute a baseline
        for action scores

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """

    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) 
Example #9
Source File: models.py    From HardRLWithYoutube with MIT License 6 votes vote down vote up
def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    convs: [(int, int int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    hiddens: [int]
        list of sizes of hidden layers
    dueling: bool
        if true double the output MLP to compute a baseline
        for action scores

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """

    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) 
Example #10
Source File: models.py    From ICML2019-TREX with MIT License 6 votes vote down vote up
def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    convs: [(int, int int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    hiddens: [int]
        list of sizes of hidden layers
    dueling: bool
        if true double the output MLP to compute a baseline
        for action scores

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """

    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) 
Example #11
Source File: models.py    From self-imitation-learning with MIT License 6 votes vote down vote up
def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    convs: [(int, int int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    hiddens: [int]
        list of sizes of hidden layers
    dueling: bool
        if true double the output MLP to compute a baseline
        for action scores

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """

    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) 
Example #12
Source File: common_layers.py    From language with Apache License 2.0 6 votes vote down vote up
def stacked_highway(input_emb, hidden_sizes, dropout_ratio, mode,
                    layer_norm=True):
  """Construct multiple `highway` layers stacked on top of one another.

  Args:
    input_emb: tensor<float> [..., embedding_size]
    hidden_sizes: list<int> [hidden_size_1, hidden_size_2, ...]
    dropout_ratio: The probability of dropping out each unit in the activation.
        This can be None, and is only applied during training.
    mode: One of the keys from tf.estimator.ModeKeys.
    layer_norm: Boolean indicating whether we should apply layer normalization.

  Returns:
    output_emb: A Tensor with the same shape as `input_emb`, except for the last
        dimension which will have size `hidden_sizes[-1]` instead.
  """
  for i, h in enumerate(hidden_sizes):
    with tf.variable_scope("highway_{}".format(i)):
      input_emb = highway(input_emb, h, dropout_ratio, mode, layer_norm)
  return input_emb 
Example #13
Source File: models.py    From lirpg with MIT License 6 votes vote down vote up
def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    convs: [(int, int int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    hiddens: [int]
        list of sizes of hidden layers
    dueling: bool
        if true double the output MLP to compute a baseline
        for action scores

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """

    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) 
Example #14
Source File: modules.py    From squad-transformer with Apache License 2.0 5 votes vote down vote up
def _build_conv_sublayer(self, inputs, sublayer_id, scope=None, reuse=None):
        """Compute layer_norm(x + conv(x)), where conv is depthwise-separable convolution

        Inputs:
          inputs: tensor. The input sequence to this sublayer. Shape (batch_size, seq_len, num_filters).
          sublayer_id: int. ID for this sublayer, used for stochastic depth dropout. Bounds: [1, self.num_sublayers].
        Returns:
          Tensor shape (batch_size, seq_len, num_filters). Result of applying the sublayer operations.
        """
        scope = scope or "ConvSublayer{}".format(sublayer_id)
        with tf.variable_scope(scope, reuse=reuse):
            outputs = self._sublayer_pre_process(inputs, reuse=reuse)
            outputs = self._ds_conv(outputs, self.d_model, self.kernel_size, self.l2_lambda, reuse=reuse)

        return self._sublayer_post_process(inputs, outputs, sublayer_id) 
Example #15
Source File: modules.py    From squad-transformer with Apache License 2.0 5 votes vote down vote up
def _sublayer_pre_process(layer_inputs, reuse=None):
        """Perform sublayer pre-processing steps. We only apply layer_norm.

        A note from Google's tensor2tensor repo:
        "The current settings ("", "dan") are the published version
        of the transformer.  ("n", "da") seems better for harder-to-learn
        models, so it should probably be the default."
        """
        return tf_layers.layer_norm(layer_inputs, scope="LayerNorm", reuse=reuse) 
Example #16
Source File: models.py    From self-imitation-learning with MIT License 5 votes vote down vote up
def mlp(hiddens=[], layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    hiddens: [int]
        list of sizes of hidden layers

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """
    return lambda *args, **kwargs: _mlp(hiddens, layer_norm=layer_norm, *args, **kwargs) 
Example #17
Source File: models.py    From self-imitation-learning with MIT License 5 votes vote down vote up
def _mlp(hiddens, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        for hidden in hiddens:
            out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None)
            if layer_norm:
                out = layers.layer_norm(out, center=True, scale=True)
            out = tf.nn.relu(out)
        q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
        return q_out 
Example #18
Source File: vrgripper_env_models.py    From tensor2robot with Apache License 2.0 5 votes vote down vote up
def model_train_fn(self,
                     features,
                     labels,
                     inference_outputs,
                     mode,
                     config = None,
                     params = None
                    ):
    """Output learned loss if inner loop, or behavior clone if outer loop."""
    if params and params.get('is_outer_loss', False):
      # Outer loss case: use standard RegressionModel loss.
      return self.loss_fn(labels, inference_outputs, mode, params)
    # Inner loss case: compute learned loss function.
    with tf.variable_scope(
        'learned_loss', reuse=tf.AUTO_REUSE, use_resource=True):
      predicted_action, _ = meta_tfdata.multi_batch_apply(
          vision_layers.BuildImageFeaturesToPoseModel,
          2,
          inference_outputs['feature_points'],
          num_outputs=self._action_size)
      if self._learned_loss_conv1d_layers is None:
        return tf.losses.mean_squared_error(predicted_action,
                                            inference_outputs['action'])
      ll_input = tf.concat([
          predicted_action, inference_outputs['feature_points'],
          inference_outputs['inference_output']
      ], -1)
      net = ll_input
      for num_filters in self._learned_loss_conv1d_layers[:-1]:
        net = tf.layers.conv1d(
            net, num_filters, 10, activation=tf.nn.relu, use_bias=False)
        net = contrib_layers.layer_norm(net)
      net = tf.layers.conv1d(net, self._learned_loss_conv1d_layers[-1],
                             1)  # 1x1 convolution.
      return tf.reduce_mean(tf.reduce_sum(tf.square(net), axis=(1, 2))) 
Example #19
Source File: models.py    From self-imitation-learning with MIT License 5 votes vote down vote up
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        with tf.variable_scope("convnet"):
            for num_outputs, kernel_size, stride in convs:
                out = layers.convolution2d(out,
                                           num_outputs=num_outputs,
                                           kernel_size=kernel_size,
                                           stride=stride,
                                           activation_fn=tf.nn.relu)
        conv_out = layers.flatten(out)
        with tf.variable_scope("action_value"):
            action_out = conv_out
            for hidden in hiddens:
                action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
                if layer_norm:
                    action_out = layers.layer_norm(action_out, center=True, scale=True)
                action_out = tf.nn.relu(action_out)
            action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)

        if dueling:
            with tf.variable_scope("state_value"):
                state_out = conv_out
                for hidden in hiddens:
                    state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm:
                        state_out = layers.layer_norm(state_out, center=True, scale=True)
                    state_out = tf.nn.relu(state_out)
                state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
            q_out = state_score + action_scores_centered
        else:
            q_out = action_scores
        return q_out 
Example #20
Source File: models_collection.py    From SketchySceneColorization with MIT License 5 votes vote down vote up
def image_encoder_residual(x, num_residual_units, num_classes, reuse=False, data_format='NCHW', labels=None, scope_name=None):
    """
    :param x: [batch_size, 3, H, W]
    :return:
    """
    assert data_format == 'NCHW'
    size = SIZE

    if normalizer_params_e is not None and normalizer_fn_e != ly.batch_norm and normalizer_fn_e != ly.layer_norm:
        normalizer_params_e['labels'] = labels
        normalizer_params_e['n_labels'] = num_classes

    output_list = []

    # encoder_1: [batch, 3, 192, 192] => [batch, 64, 96, 96]
    with tf.variable_scope("encoder_1"):
        output = nchw_conv_ex(x, size, stride=2, filter_size=7)
        output = batchnorm(output, data_format=data_format)
        output = lrelu(output, 0.2)
        output_list.append(output)

    layer_specs = [
        size * 2,  # encoder_2: [batch, 64, 96, 96] => [batch, 128, 48, 48]
        size * 4,  # encoder_3: [batch, 128, 48, 48] => [batch, 256, 24, 24]
        size * 8,  # encoder_4: [batch, 256, 24, 24] => [batch, 512, 12, 12]
        size * 8,  # encoder_5: [batch, 512, 12, 12] => [batch, 512, 6, 6]
    ]
    for encoder_layer, (out_channels) in enumerate(layer_specs):
        with tf.variable_scope("encoder_%d_0" % (len(output_list) + 1)):
            output = bottleneck_residual_en(output_list[-1], out_channels, stride=2)
        for uId in range(1, num_residual_units[encoder_layer]):
            with tf.variable_scope("encoder_%d_%d" % (len(output_list) + 1, uId)):
                output = bottleneck_residual_pu(output, out_channels, True)
        output_list.append(output)

    return output_list 
Example #21
Source File: models.py    From lirpg with MIT License 5 votes vote down vote up
def _mlp(hiddens, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        for hidden in hiddens:
            out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None)
            if layer_norm:
                out = layers.layer_norm(out, center=True, scale=True)
            out = tf.nn.relu(out)
        q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
        return q_out 
Example #22
Source File: models.py    From sonic_contest with MIT License 5 votes vote down vote up
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        with tf.variable_scope("convnet"):
            for num_outputs, kernel_size, stride in convs:
                out = layers.convolution2d(out,
                                           num_outputs=num_outputs,
                                           kernel_size=kernel_size,
                                           stride=stride,
                                           activation_fn=tf.nn.relu)
        conv_out = layers.flatten(out)
        with tf.variable_scope("action_value"):
            action_out = conv_out
            for hidden in hiddens:
                action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
                if layer_norm:
                    action_out = layers.layer_norm(action_out, center=True, scale=True)
                action_out = tf.nn.relu(action_out)
            action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)

        if dueling:
            with tf.variable_scope("state_value"):
                state_out = conv_out
                for hidden in hiddens:
                    state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm:
                        state_out = layers.layer_norm(state_out, center=True, scale=True)
                    state_out = tf.nn.relu(state_out)
                state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
            q_out = state_score + action_scores_centered
        else:
            q_out = action_scores
        return q_out 
Example #23
Source File: models.py    From sonic_contest with MIT License 5 votes vote down vote up
def mlp(hiddens=[], layer_norm=False):
    """This model takes as input an observation and returns values of all actions.

    Parameters
    ----------
    hiddens: [int]
        list of sizes of hidden layers

    Returns
    -------
    q_func: function
        q_function for DQN algorithm.
    """
    return lambda *args, **kwargs: _mlp(hiddens, layer_norm=layer_norm, *args, **kwargs) 
Example #24
Source File: models.py    From sonic_contest with MIT License 5 votes vote down vote up
def _mlp(hiddens, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        for hidden in hiddens:
            out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None)
            if layer_norm:
                out = layers.layer_norm(out, center=True, scale=True)
            out = tf.nn.relu(out)
        q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
        return q_out 
Example #25
Source File: models.py    From qmap with MIT License 5 votes vote down vote up
def ConvMlp(convs, hiddens, dueling=False, layer_norm=False):
    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) 
Example #26
Source File: models.py    From ICML2019-TREX with MIT License 5 votes vote down vote up
def _cnn_to_mlp(convs, hiddens, dueling, input_, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = input_
        with tf.variable_scope("convnet"):
            for num_outputs, kernel_size, stride in convs:
                out = layers.convolution2d(out,
                                           num_outputs=num_outputs,
                                           kernel_size=kernel_size,
                                           stride=stride,
                                           activation_fn=tf.nn.relu)
        conv_out = layers.flatten(out)
        with tf.variable_scope("action_value"):
            action_out = conv_out
            for hidden in hiddens:
                action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
                if layer_norm:
                    action_out = layers.layer_norm(action_out, center=True, scale=True)
                action_out = tf.nn.relu(action_out)
            action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)

        if dueling:
            with tf.variable_scope("state_value"):
                state_out = conv_out
                for hidden in hiddens:
                    state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm:
                        state_out = layers.layer_norm(state_out, center=True, scale=True)
                    state_out = tf.nn.relu(state_out)
                state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
            q_out = state_score + action_scores_centered
        else:
            q_out = action_scores
        return q_out 
Example #27
Source File: models.py    From qmap with MIT License 5 votes vote down vote up
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        inpt = tf.cast(inpt, tf.float32)
        inpt = tf.div(inpt, 255.)
        out = inpt
        with tf.variable_scope("convnet"):
            for num_outputs, kernel_size, stride in convs:
                out = layers.convolution2d(out,
                                           num_outputs=num_outputs,
                                           kernel_size=kernel_size,
                                           stride=stride,
                                           activation_fn=tf.nn.relu)
        conv_out = layers.flatten(out)
        with tf.variable_scope("action_value"):
            action_out = conv_out
            for hidden in hiddens:
                action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
                if layer_norm:
                    action_out = layers.layer_norm(action_out, center=True, scale=True)
                action_out = tf.nn.relu(action_out)
            action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)

        if dueling:
            with tf.variable_scope("state_value"):
                state_out = conv_out
                for hidden in hiddens:
                    state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm:
                        state_out = layers.layer_norm(state_out, center=True, scale=True)
                    state_out = tf.nn.relu(state_out)
                state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
            q_out = state_score + action_scores_centered
        else:
            q_out = action_scores
        return q_out 
Example #28
Source File: modeling.py    From albert with Apache License 2.0 5 votes vote down vote up
def layer_norm_and_dropout(input_tensor, dropout_prob, name=None):
  """Runs layer normalization followed by dropout."""
  output_tensor = layer_norm(input_tensor, name)
  output_tensor = dropout(output_tensor, dropout_prob)
  return output_tensor 
Example #29
Source File: modeling.py    From albert with Apache License 2.0 5 votes vote down vote up
def layer_norm(input_tensor, name=None):
  """Run layer normalization on the last dimension of the tensor."""
  return contrib_layers.layer_norm(
      inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name) 
Example #30
Source File: models.py    From ICML2019-TREX with MIT License 5 votes vote down vote up
def build_q_func(network, hiddens=[256], dueling=True, layer_norm=False, **network_kwargs):
    if isinstance(network, str):
        from baselines.common.models import get_network_builder
        network = get_network_builder(network)(**network_kwargs)

    def q_func_builder(input_placeholder, num_actions, scope, reuse=False):
        with tf.variable_scope(scope, reuse=reuse):
            latent = network(input_placeholder)
            if isinstance(latent, tuple):
                if latent[1] is not None:
                    raise NotImplementedError("DQN is not compatible with recurrent policies yet")
                latent = latent[0]

            latent = layers.flatten(latent)

            with tf.variable_scope("action_value"):
                action_out = latent
                for hidden in hiddens:
                    action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm:
                        action_out = layers.layer_norm(action_out, center=True, scale=True)
                    action_out = tf.nn.relu(action_out)
                action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)

            if dueling:
                with tf.variable_scope("state_value"):
                    state_out = latent
                    for hidden in hiddens:
                        state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
                        if layer_norm:
                            state_out = layers.layer_norm(state_out, center=True, scale=True)
                        state_out = tf.nn.relu(state_out)
                    state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
                action_scores_mean = tf.reduce_mean(action_scores, 1)
                action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
                q_out = state_score + action_scores_centered
            else:
                q_out = action_scores
            return q_out

    return q_func_builder