Python keras.layers.Softmax() Examples
The following are 14
code examples of keras.layers.Softmax().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.layers
, or try the search function
.
Example #1
Source File: __init__.py From transformer-word-segmenter with Apache License 2.0 | 6 votes |
def __output(self, dec_output): output_dropout_layer = Dropout(self.output_dropout) output_layer = Conv1D(self.tgt_vocab_size + 1, kernel_size=1, activation=gelu, kernel_regularizer=regularizers.l2(self.l2_reg_penalty), name='output_layer') output_softmax_layer = Softmax(name="word_predictions") if self.use_crf: return output_layer(output_dropout_layer(dec_output)) else: return output_softmax_layer(output_layer(output_dropout_layer(dec_output)))
Example #2
Source File: models.py From Federated-Learning-Mini-Framework with MIT License | 6 votes |
def create_model(input_shape: tuple, nb_classes: int, init_with_imagenet: bool = False, learning_rate: float = 0.01): weights = None if init_with_imagenet: weights = "imagenet" model = VGG16(input_shape=input_shape, classes=nb_classes, weights=weights, include_top=False) # "Shallow" VGG for Cifar10 x = model.get_layer('block3_pool').output x = layers.Flatten(name='Flatten')(x) x = layers.Dense(512, activation='relu')(x) x = layers.Dense(nb_classes)(x) x = layers.Softmax()(x) model = models.Model(model.input, x) loss = losses.categorical_crossentropy optimizer = optimizers.SGD(lr=learning_rate, decay=0.99) model.compile(optimizer, loss, metrics=["accuracy"]) return model
Example #3
Source File: models.py From Hands-On-Generative-Adversarial-Networks-with-Keras with MIT License | 6 votes |
def build_resnet_generator(input_shape, n_filters, n_residual_blocks, seq_len, vocabulary_size): inputs = Input(shape=input_shape) # Dense 1: 1 x seq_len x n_filters x = Dense(1 * seq_len * n_filters, input_shape=input_shape)(inputs) x = Reshape((1, seq_len, n_filters))(x) # ResNet blocks x = resnet_block(x, n_residual_blocks, n_filters) # Output layer x = Conv2D(filters=vocabulary_size, kernel_size=1, padding='same')(x) x = Softmax(axis=3)(x) # create model graph model = Model(inputs=inputs, outputs=x, name='Generator') print("\nGenerator ResNet") model.summary() return model
Example #4
Source File: core.py From transformer-keras with Apache License 2.0 | 6 votes |
def __call__(self, q, k, v, attn_mask=None, scale=1.0): """ :param q: Queries 张量,形状为[N, T_q, D_q] :param k: Keys 张量,形状为[N, T_k, D_k] :param v: Values 张量,形状为[N, T_v, D_v] :param attn_mask: 注意力掩码,形状为[N, T_q, T_k] :param scale: 缩放因子,浮点标量 :return: 上下文张量和注意力张量 """ attention = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=(2, 2)) * scale)([q, k]) # [N, T_q, T_k] if attn_mask is not None: # 为需要掩码的地方设置一个负无穷,softmax之后就会趋近于0 attention = Lambda(lambda x: (-1e+10) * (1 - x[0]) + x[1])([attn_mask, attention]) attention = Softmax(axis=-1)(attention) attention = Dropout(self.attention_dropout)(attention) # [N, T_q, T_k] context = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=(2, 1)))([attention, v]) # [N, T_q, D_q] return context, attention
Example #5
Source File: advanced_activations_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_softmax(): for axis in [1, -1]: layer_test(layers.Softmax, kwargs={'axis': axis}, input_shape=(2, 3, 4))
Example #6
Source File: advanced_activations_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_softmax(): for axis in [1, -1]: layer_test(layers.Softmax, kwargs={'axis': axis}, input_shape=(2, 3, 4))
Example #7
Source File: advanced_activations_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_softmax(): for axis in [1, -1]: layer_test(layers.Softmax, kwargs={'axis': axis}, input_shape=(2, 3, 4))
Example #8
Source File: advanced_activations_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_softmax(): for axis in [1, -1]: layer_test(layers.Softmax, kwargs={'axis': axis}, input_shape=(2, 3, 4))
Example #9
Source File: advanced_activations_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_softmax(): for axis in [1, -1]: layer_test(layers.Softmax, kwargs={'axis': axis}, input_shape=(2, 3, 4))
Example #10
Source File: advanced_activations_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_softmax(): for axis in [1, -1]: layer_test(layers.Softmax, kwargs={'axis': axis}, input_shape=(2, 3, 4))
Example #11
Source File: advanced_activations_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def test_softmax(): for axis in [1, -1]: layer_test(layers.Softmax, kwargs={'axis': axis}, input_shape=(2, 3, 4))
Example #12
Source File: core.py From transformer-keras with Apache License 2.0 | 5 votes |
def __init__(self, src_vocab_size, src_max_len, tgt_vocab_size, tgt_max_len, optimizer=Adam(lr=1e-3), num_layers=6, model_dim=512, num_heads=8, ffn_dim=2048, dropout=0.2, src_tokenizer=None, tgt_tokenizer=None, weights_path=None): self.optimizer = optimizer self.src_max_len = src_max_len self.tgt_max_len = tgt_max_len self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.model_dim = model_dim self.num_layers = num_layers self.num_heads = num_heads self.ffn_dim = ffn_dim self.dropout = dropout self.decode_model = None # used in beam_search self.encode_model = None # used in beam_search self.src_tokenizer = src_tokenizer self.tgt_tokenizer = tgt_tokenizer self.encoder = Encoder(src_vocab_size, src_max_len, num_layers, model_dim, num_heads, ffn_dim, dropout) self.decoder = Decoder(tgt_vocab_size, tgt_max_len, num_layers, model_dim, num_heads, ffn_dim, dropout) self.linear = Dense(tgt_vocab_size + 1, use_bias=False) self.softmax = Softmax(axis=2) self.pred_model, self.model = self.__build_model() if weights_path is not None: self.model.load_weights(weights_path)
Example #13
Source File: models.py From keras-transformer with MIT License | 4 votes |
def universal_transformer_gpt_model( max_seq_length: int, vocabulary_size: int, word_embedding_size: int, transformer_depth: int, num_heads: int, transformer_dropout: float = 0.1, embedding_dropout: float = 0.6, l2_reg_penalty: float = 1e-6, confidence_penalty_weight: float = 0.1): """ A model which is similar to the one described by OpenAI in paper "Improving Language Understanding by Generative Pre-Training", except that it relies L2 regularization of the word embedding matrix (instead of the dropout), and uses Universal Transformer architecture. """ word_ids = Input(shape=(max_seq_length,), dtype='int32', name='word_ids') l2_regularizer = (regularizers.l2(l2_reg_penalty) if l2_reg_penalty else None) embedding_layer = ReusableEmbedding( vocabulary_size, word_embedding_size, input_length=max_seq_length, name='bpe_embeddings', # Regularization is based on paper "A Comparative Study on # Regularization Strategies for Embedding-based Neural Networks" # https://arxiv.org/pdf/1508.03721.pdf embeddings_regularizer=l2_regularizer) output_layer = TiedOutputEmbedding( projection_regularizer=l2_regularizer, projection_dropout=embedding_dropout, name='word_prediction_logits') coordinate_embedding_layer = TransformerCoordinateEmbedding( transformer_depth, name='coordinate_embedding') transformer_act_layer = TransformerACT(name='adaptive_computation_time') transformer_block = TransformerBlock( name='transformer', num_heads=num_heads, residual_dropout=transformer_dropout, attention_dropout=transformer_dropout, use_masking=True, vanilla_wiring=False) output_softmax_layer = Softmax(name='word_predictions') next_step_input, embedding_matrix = embedding_layer(word_ids) act_output = next_step_input for i in range(transformer_depth): next_step_input = coordinate_embedding_layer(next_step_input, step=i) next_step_input = transformer_block(next_step_input) next_step_input, act_output = transformer_act_layer(next_step_input) transformer_act_layer.finalize() next_step_input = act_output word_predictions = output_softmax_layer( output_layer([next_step_input, embedding_matrix])) model = Model(inputs=[word_ids], outputs=[word_predictions]) # Penalty for confidence of the output distribution, as described in # "Regularizing Neural Networks by Penalizing Confident # Output Distributions" (https://arxiv.org/abs/1701.06548) confidence_penalty = K.mean( confidence_penalty_weight * K.sum(word_predictions * K.log(word_predictions), axis=-1)) model.add_loss(confidence_penalty) return model
Example #14
Source File: models.py From keras-transformer with MIT License | 4 votes |
def vanilla_transformer_gpt_model( max_seq_length: int, vocabulary_size: int, word_embedding_size: int, transformer_depth: int, num_heads: int, transformer_dropout: float = 0.1, embedding_dropout: float = 0.6, l2_reg_penalty: float = 1e-6, confidence_penalty_weight: float = 0.1): """ A model which is almost identical to the one described by OpenAI in paper "Improving Language Understanding by Generative Pre-Training", except that it uses L2 regularization of the word embedding matrix, instead of the dropout. """ word_ids = Input(shape=(max_seq_length,), dtype='int32', name='word_ids') l2_regularizer = (regularizers.l2(l2_reg_penalty) if l2_reg_penalty else None) embedding_layer = ReusableEmbedding( vocabulary_size, word_embedding_size, input_length=max_seq_length, name='bpe_embeddings', # Regularization is based on paper "A Comparative Study on # Regularization Strategies for Embedding-based Neural Networks" # https://arxiv.org/pdf/1508.03721.pdf embeddings_regularizer=l2_regularizer) output_layer = TiedOutputEmbedding( projection_regularizer=l2_regularizer, projection_dropout=embedding_dropout, name='word_prediction_logits') coordinate_embedding_layer = TransformerCoordinateEmbedding( 1, name='coordinate_embedding') output_softmax_layer = Softmax(name='word_predictions') next_step_input, embedding_matrix = embedding_layer(word_ids) next_step_input = coordinate_embedding_layer(next_step_input, step=0) for i in range(transformer_depth): next_step_input = ( TransformerBlock( name='transformer' + str(i), num_heads=num_heads, residual_dropout=transformer_dropout, attention_dropout=transformer_dropout, use_masking=True, vanilla_wiring=True) (next_step_input)) word_predictions = output_softmax_layer( output_layer([next_step_input, embedding_matrix])) model = Model(inputs=[word_ids], outputs=[word_predictions]) # Penalty for confidence of the output distribution, as described in # "Regularizing Neural Networks by Penalizing Confident # Output Distributions" (https://arxiv.org/abs/1701.06548) confidence_penalty = K.mean( confidence_penalty_weight * K.sum(word_predictions * K.log(word_predictions), axis=-1)) model.add_loss(confidence_penalty) return model