Python keras.layers.Softmax() Examples

The following are 14 code examples of keras.layers.Softmax(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.layers , or try the search function .
Example #1
Source File: __init__.py    From transformer-word-segmenter with Apache License 2.0 6 votes vote down vote up
def __output(self, dec_output):

        output_dropout_layer = Dropout(self.output_dropout)

        output_layer = Conv1D(self.tgt_vocab_size + 1,
                              kernel_size=1,
                              activation=gelu,
                              kernel_regularizer=regularizers.l2(self.l2_reg_penalty),
                              name='output_layer')

        output_softmax_layer = Softmax(name="word_predictions")

        if self.use_crf:
            return output_layer(output_dropout_layer(dec_output))
        else:
            return output_softmax_layer(output_layer(output_dropout_layer(dec_output))) 
Example #2
Source File: models.py    From Federated-Learning-Mini-Framework with MIT License 6 votes vote down vote up
def create_model(input_shape: tuple, nb_classes: int, init_with_imagenet: bool = False, learning_rate: float = 0.01):
    weights = None
    if init_with_imagenet:
        weights = "imagenet"

    model = VGG16(input_shape=input_shape,
                  classes=nb_classes,
                  weights=weights,
                  include_top=False)
    # "Shallow" VGG for Cifar10
    x = model.get_layer('block3_pool').output
    x = layers.Flatten(name='Flatten')(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dense(nb_classes)(x)
    x = layers.Softmax()(x)
    model = models.Model(model.input, x)

    loss = losses.categorical_crossentropy
    optimizer = optimizers.SGD(lr=learning_rate, decay=0.99)

    model.compile(optimizer, loss, metrics=["accuracy"])
    return model 
Example #3
Source File: models.py    From Hands-On-Generative-Adversarial-Networks-with-Keras with MIT License 6 votes vote down vote up
def build_resnet_generator(input_shape, n_filters, n_residual_blocks,
                           seq_len, vocabulary_size):
    inputs = Input(shape=input_shape)

    # Dense 1: 1 x seq_len x n_filters
    x = Dense(1 * seq_len * n_filters, input_shape=input_shape)(inputs)
    x = Reshape((1, seq_len, n_filters))(x)

    # ResNet blocks
    x = resnet_block(x, n_residual_blocks, n_filters)

    # Output layer
    x = Conv2D(filters=vocabulary_size, kernel_size=1, padding='same')(x)
    x = Softmax(axis=3)(x)

    # create model graph
    model = Model(inputs=inputs, outputs=x, name='Generator')

    print("\nGenerator ResNet")
    model.summary()
    return model 
Example #4
Source File: core.py    From transformer-keras with Apache License 2.0 6 votes vote down vote up
def __call__(self, q, k, v, attn_mask=None, scale=1.0):
        """

        :param q: Queries 张量,形状为[N, T_q, D_q]
        :param k: Keys 张量,形状为[N, T_k, D_k]
        :param v: Values 张量,形状为[N, T_v, D_v]
        :param attn_mask: 注意力掩码,形状为[N, T_q, T_k]
        :param scale: 缩放因子,浮点标量
        :return: 上下文张量和注意力张量
        """

        attention = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=(2, 2)) * scale)([q, k])  # [N, T_q, T_k]
        if attn_mask is not None:
            # 为需要掩码的地方设置一个负无穷,softmax之后就会趋近于0
            attention = Lambda(lambda x: (-1e+10) * (1 - x[0]) + x[1])([attn_mask, attention])
        attention = Softmax(axis=-1)(attention)
        attention = Dropout(self.attention_dropout)(attention)  # [N, T_q, T_k]
        context = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=(2, 1)))([attention, v])  # [N, T_q, D_q]
        return context, attention 
Example #5
Source File: advanced_activations_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_softmax():
    for axis in [1, -1]:
        layer_test(layers.Softmax, kwargs={'axis': axis},
                   input_shape=(2, 3, 4)) 
Example #6
Source File: advanced_activations_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_softmax():
    for axis in [1, -1]:
        layer_test(layers.Softmax, kwargs={'axis': axis},
                   input_shape=(2, 3, 4)) 
Example #7
Source File: advanced_activations_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_softmax():
    for axis in [1, -1]:
        layer_test(layers.Softmax, kwargs={'axis': axis},
                   input_shape=(2, 3, 4)) 
Example #8
Source File: advanced_activations_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_softmax():
    for axis in [1, -1]:
        layer_test(layers.Softmax, kwargs={'axis': axis},
                   input_shape=(2, 3, 4)) 
Example #9
Source File: advanced_activations_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_softmax():
    for axis in [1, -1]:
        layer_test(layers.Softmax, kwargs={'axis': axis},
                   input_shape=(2, 3, 4)) 
Example #10
Source File: advanced_activations_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_softmax():
    for axis in [1, -1]:
        layer_test(layers.Softmax, kwargs={'axis': axis},
                   input_shape=(2, 3, 4)) 
Example #11
Source File: advanced_activations_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_softmax():
    for axis in [1, -1]:
        layer_test(layers.Softmax, kwargs={'axis': axis},
                   input_shape=(2, 3, 4)) 
Example #12
Source File: core.py    From transformer-keras with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 src_vocab_size,
                 src_max_len,
                 tgt_vocab_size,
                 tgt_max_len,
                 optimizer=Adam(lr=1e-3),
                 num_layers=6,
                 model_dim=512,
                 num_heads=8,
                 ffn_dim=2048,
                 dropout=0.2,
                 src_tokenizer=None,
                 tgt_tokenizer=None,
                 weights_path=None):

        self.optimizer = optimizer
        self.src_max_len = src_max_len
        self.tgt_max_len = tgt_max_len
        self.src_vocab_size = src_vocab_size
        self.tgt_vocab_size = tgt_vocab_size
        self.model_dim = model_dim
        self.num_layers = num_layers
        self.num_heads = num_heads
        self.ffn_dim = ffn_dim
        self.dropout = dropout

        self.decode_model = None  # used in beam_search
        self.encode_model = None  # used in beam_search
        self.src_tokenizer = src_tokenizer
        self.tgt_tokenizer = tgt_tokenizer

        self.encoder = Encoder(src_vocab_size, src_max_len, num_layers, model_dim,
                               num_heads, ffn_dim, dropout)
        self.decoder = Decoder(tgt_vocab_size, tgt_max_len, num_layers, model_dim,
                               num_heads, ffn_dim, dropout)
        self.linear = Dense(tgt_vocab_size + 1, use_bias=False)
        self.softmax = Softmax(axis=2)

        self.pred_model, self.model = self.__build_model()
        if weights_path is not None:
            self.model.load_weights(weights_path) 
Example #13
Source File: models.py    From keras-transformer with MIT License 4 votes vote down vote up
def universal_transformer_gpt_model(
        max_seq_length: int, vocabulary_size: int,
        word_embedding_size: int, transformer_depth: int,
        num_heads: int, transformer_dropout: float = 0.1,
        embedding_dropout: float = 0.6,
        l2_reg_penalty: float = 1e-6,
        confidence_penalty_weight: float = 0.1):
    """
    A model which is similar to the one described by OpenAI in paper
    "Improving Language Understanding by Generative Pre-Training", except
    that it relies L2 regularization of the word embedding matrix
    (instead of the dropout), and uses Universal Transformer architecture.
    """
    word_ids = Input(shape=(max_seq_length,), dtype='int32', name='word_ids')
    l2_regularizer = (regularizers.l2(l2_reg_penalty) if l2_reg_penalty
                      else None)
    embedding_layer = ReusableEmbedding(
        vocabulary_size, word_embedding_size,
        input_length=max_seq_length,
        name='bpe_embeddings',
        # Regularization is based on paper "A Comparative Study on
        # Regularization Strategies for Embedding-based Neural Networks"
        # https://arxiv.org/pdf/1508.03721.pdf
        embeddings_regularizer=l2_regularizer)
    output_layer = TiedOutputEmbedding(
        projection_regularizer=l2_regularizer,
        projection_dropout=embedding_dropout,
        name='word_prediction_logits')
    coordinate_embedding_layer = TransformerCoordinateEmbedding(
        transformer_depth,
        name='coordinate_embedding')
    transformer_act_layer = TransformerACT(name='adaptive_computation_time')
    transformer_block = TransformerBlock(
        name='transformer', num_heads=num_heads,
        residual_dropout=transformer_dropout,
        attention_dropout=transformer_dropout,
        use_masking=True, vanilla_wiring=False)
    output_softmax_layer = Softmax(name='word_predictions')

    next_step_input, embedding_matrix = embedding_layer(word_ids)
    act_output = next_step_input

    for i in range(transformer_depth):
        next_step_input = coordinate_embedding_layer(next_step_input, step=i)
        next_step_input = transformer_block(next_step_input)
        next_step_input, act_output = transformer_act_layer(next_step_input)

    transformer_act_layer.finalize()
    next_step_input = act_output
    word_predictions = output_softmax_layer(
        output_layer([next_step_input, embedding_matrix]))
    model = Model(inputs=[word_ids], outputs=[word_predictions])
    # Penalty for confidence of the output distribution, as described in
    # "Regularizing Neural Networks by Penalizing Confident
    # Output Distributions" (https://arxiv.org/abs/1701.06548)
    confidence_penalty = K.mean(
        confidence_penalty_weight *
        K.sum(word_predictions * K.log(word_predictions), axis=-1))
    model.add_loss(confidence_penalty)
    return model 
Example #14
Source File: models.py    From keras-transformer with MIT License 4 votes vote down vote up
def vanilla_transformer_gpt_model(
        max_seq_length: int, vocabulary_size: int,
        word_embedding_size: int, transformer_depth: int,
        num_heads: int, transformer_dropout: float = 0.1,
        embedding_dropout: float = 0.6,
        l2_reg_penalty: float = 1e-6,
        confidence_penalty_weight: float = 0.1):
    """
    A model which is almost identical to the one described by OpenAI in paper
    "Improving Language Understanding by Generative Pre-Training", except
    that it uses L2 regularization of the word embedding matrix,
    instead of the dropout.
    """
    word_ids = Input(shape=(max_seq_length,), dtype='int32', name='word_ids')
    l2_regularizer = (regularizers.l2(l2_reg_penalty) if l2_reg_penalty
                      else None)
    embedding_layer = ReusableEmbedding(
        vocabulary_size, word_embedding_size,
        input_length=max_seq_length,
        name='bpe_embeddings',
        # Regularization is based on paper "A Comparative Study on
        # Regularization Strategies for Embedding-based Neural Networks"
        # https://arxiv.org/pdf/1508.03721.pdf
        embeddings_regularizer=l2_regularizer)
    output_layer = TiedOutputEmbedding(
        projection_regularizer=l2_regularizer,
        projection_dropout=embedding_dropout,
        name='word_prediction_logits')
    coordinate_embedding_layer = TransformerCoordinateEmbedding(
        1,
        name='coordinate_embedding')
    output_softmax_layer = Softmax(name='word_predictions')

    next_step_input, embedding_matrix = embedding_layer(word_ids)

    next_step_input = coordinate_embedding_layer(next_step_input, step=0)
    for i in range(transformer_depth):
        next_step_input = (
            TransformerBlock(
                name='transformer' + str(i), num_heads=num_heads,
                residual_dropout=transformer_dropout,
                attention_dropout=transformer_dropout,
                use_masking=True,
                vanilla_wiring=True)
            (next_step_input))

    word_predictions = output_softmax_layer(
        output_layer([next_step_input, embedding_matrix]))
    model = Model(inputs=[word_ids], outputs=[word_predictions])
    # Penalty for confidence of the output distribution, as described in
    # "Regularizing Neural Networks by Penalizing Confident
    # Output Distributions" (https://arxiv.org/abs/1701.06548)
    confidence_penalty = K.mean(
        confidence_penalty_weight *
        K.sum(word_predictions * K.log(word_predictions), axis=-1))
    model.add_loss(confidence_penalty)
    return model