Python tensorflow.keras.layers.Embedding() Examples

The following are 18 code examples of tensorflow.keras.layers.Embedding(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.keras.layers , or try the search function .
Example #1
Source File: keras_model.py    From code2vec with MIT License 6 votes vote down vote up
def _get_vocab_embedding_as_np_array(self, vocab_type: VocabType) -> np.ndarray:
        assert vocab_type in VocabType

        vocab_type_to_embedding_layer_mapping = {
            VocabType.Target: 'target_index',
            VocabType.Token: 'token_embedding',
            VocabType.Path: 'path_embedding'
        }
        embedding_layer_name = vocab_type_to_embedding_layer_mapping[vocab_type]
        weight = np.array(self.keras_train_model.get_layer(embedding_layer_name).get_weights()[0])
        assert len(weight.shape) == 2

        # token, path have an actual `Embedding` layers, but target have just a `Dense` layer.
        # hence, transpose the weight when necessary.
        assert self.vocabs.get(vocab_type).size in weight.shape
        if self.vocabs.get(vocab_type).size != weight.shape[0]:
            weight = np.transpose(weight)

        return weight 
Example #2
Source File: model.py    From cloudml-samples with Apache License 2.0 6 votes vote down vote up
def keras_estimator(model_dir, config, learning_rate, vocab_size):
  """Creates a Keras Sequential model with layers.

  Args:
    model_dir: (str) file path where training files will be written.
    config: (tf.estimator.RunConfig) Configuration options to save model.
    learning_rate: (int) Learning rate.
    vocab_size: (int) Size of the vocabulary in number of words.

  Returns:
      A keras.Model
  """
  model = models.Sequential()
  model.add(Embedding(vocab_size, 16))
  model.add(GlobalAveragePooling1D())
  model.add(Dense(16, activation=tf.nn.relu))
  model.add(Dense(1, activation=tf.nn.sigmoid))

  # Compile model with learning parameters.
  optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
  model.compile(
      optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
  estimator = tf.keras.estimator.model_to_estimator(
      keras_model=model, model_dir=model_dir, config=config)
  return estimator 
Example #3
Source File: BidirectionalLSTM.py    From tape-neurips2019 with MIT License 6 votes vote down vote up
def __init__(self,
                 n_symbols: int,
                 n_units: int = 1024,
                 n_layers: int = 3,
                 dropout: Optional[float] = 0.1) -> None:
        super().__init__(n_symbols)

        if dropout is None:
            dropout = 0

        self.embedding = Embedding(n_symbols, 128)

        self.forward_lstm = Stack([
            LSTM(n_units,
                 return_sequences=True) for _ in range(n_layers)],
            name='forward_lstm')

        self.reverse_lstm = Stack([
            LSTM(n_units,
                 return_sequences=True) for _ in range(n_layers)],
            name='reverse_lstm')

        self.dropout = Dropout(dropout) 
Example #4
Source File: BeplerModel.py    From tape-neurips2019 with MIT License 6 votes vote down vote up
def __init__(self, n_symbols: int, dropout: float = 0, use_pfam_alphabet: bool = True):
        super().__init__()

        self._use_pfam_alphabet = use_pfam_alphabet

        if use_pfam_alphabet:
            self.embed = Embedding(n_symbols, n_symbols)
        else:
            n_symbols = 21
            self.embed = Embedding(n_symbols + 1, n_symbols)

        self.dropout = Dropout(dropout)
        self.rnn = Stack([
            LSTM(1024, return_sequences=True, use_bias=True,
                 implementation=2, recurrent_activation='sigmoid'),
            LSTM(1024, return_sequences=True, use_bias=True,
                 implementation=2, recurrent_activation='sigmoid')])

        self.compute_logits = Dense(n_symbols, use_bias=True, activation='linear') 
Example #5
Source File: basic.py    From autokeras with MIT License 5 votes vote down vote up
def build(self, hp, inputs=None):
        input_node = nest.flatten(inputs)[0]
        # TODO: support more pretrained embedding layers.
        # glove, fasttext, and word2vec
        pretraining = self.pretraining or hp.Choice(
            'pretraining',
            ['random', 'glove', 'fasttext', 'word2vec', 'none'],
            default='none')
        embedding_dim = self.embedding_dim or hp.Choice(
            'embedding_dim',
            [32, 64, 128, 256, 512],
            default=128)
        if pretraining != 'none':
            # TODO: load from pretrained weights
            layer = layers.Embedding(
                input_dim=self.max_features,
                output_dim=embedding_dim,
                input_length=input_node.shape[1])
            # trainable=False,
            # weights=[embedding_matrix])
        else:
            layer = layers.Embedding(
                input_dim=self.max_features,
                output_dim=embedding_dim)
            # input_length=input_node.shape[1],
            # trainable=True)
        output_node = layer(input_node)
        if self.dropout_rate is not None:
            dropout_rate = self.dropout_rate
        else:
            dropout_rate = hp.Choice('dropout_rate', [0.0, 0.25, 0.5], default=0.25)
        if dropout_rate > 0:
            output_node = layers.Dropout(dropout_rate)(output_node)
        return output_node 
Example #6
Source File: train.py    From stacks-usecase with Apache License 2.0 5 votes vote down vote up
def _rnn(dim=1000, classes=10, dropout=0.6):
    """recurrent model"""
    _model = Sequential()
    _model.add(Embedding(dim, 64))
    _model.add(GRU(64))
    _model.add(Dense(64, activation="relu"))
    _model.add(Dropout(dropout))
    _model.add(Dense(10, activation="sigmoid"))
    return _model 
Example #7
Source File: BeplerModel.py    From tape-neurips2019 with MIT License 5 votes vote down vote up
def __init__(self, n_symbols: int, dropout: float = 0, use_pfam_alphabet: bool = True):
        super().__init__()

        if not use_pfam_alphabet:
            n_symbols = 21

        self.embed = Embedding(n_symbols, 512)
        self.lm = BiLM(n_symbols, dropout)
        self.proj = Dense(512, use_bias=True, activation='linear')
        self.transform = Activation('relu') 
Example #8
Source File: Resnet.py    From tape-neurips2019 with MIT License 5 votes vote down vote up
def __init__(self,
                 n_symbols: int,
                 n_layers: int = 35,
                 filters: int = 256,
                 kernel_size: int = 9,
                 layer_norm: bool = True,
                 activation: str = 'elu',
                 dilation_rate: int = 2,
                 dropout: Optional[float] = 0.1) -> None:
        super().__init__(n_symbols)
        self.n_symbols = n_symbols
        self.n_layers = n_layers
        self.filters = filters
        self.kernel_size = kernel_size
        self.layer_norm = layer_norm
        self.activation = activation
        self.dilation_rate = dilation_rate
        self.dropout = dropout

        print(self)

        input_embedding = Stack()
        input_embedding.add(Embedding(n_symbols, 128))
        input_embedding.add(Lambda(lambda x: x * np.sqrt(filters)))
        input_embedding.add(PositionEmbedding())

        encoder = Stack()
        encoder.add(input_embedding)
        encoder.add(PaddedConv(1, filters, kernel_size, 1, activation, dropout))
        encoder.add(ResidualBlock(1, filters, kernel_size, activation=activation,
                                  dilation_rate=1, dropout=dropout))
        for layer in range(n_layers - 1):
            encoder.add(ResidualBlock(1, filters, kernel_size, activation=activation,
                                      dilation_rate=dilation_rate, dropout=dropout,
                                      add_checkpoint=layer % 5 == 0))

        self.encoder = encoder 
Example #9
Source File: simple_model.py    From tape-neurips2019 with MIT License 5 votes vote down vote up
def __init__(self,
                 n_symbols: int,  # This argument is required!
                 filters: int = 32  # There's no way to change this
                                    # from the commandline - see `my_simple_model_with_hparams.py`
                 ) -> None:
        super().__init__(n_symbols)

        self.input_embedding = Embedding(n_symbols, 10)
        self.conv1d = Conv1D(filters=filters, kernel_size=7, strides=1, padding='same') 
Example #10
Source File: embedding.py    From nlp-journey with Apache License 2.0 5 votes vote down vote up
def _build_model(self):
        model = keras.Sequential([
            layers.Embedding(self.encoder.vocab_size, self.embedding_dim),
            layers.GlobalAveragePooling1D(),
            layers.Dense(16, activation='relu'),
            layers.Dense(1)
        ])
        model.compile(optimizer='adam',
                      loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                      metrics=['accuracy'])
        model.summary()
        return model 
Example #11
Source File: feature_aggregation_similarity_model.py    From redshells with MIT License 5 votes vote down vote up
def __init__(self,
                 feature_size: int,
                 embedding_size: int,
                 item_size: int,
                 max_feature_index: int,
                 embeddings_initializer=None,
                 bias_embeddings_initializer=None,
                 embeddings_regularizer=None):
        embeddings_initializer = embeddings_initializer or tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.005)
        embeddings_regularizer = embeddings_regularizer or tf.keras.regularizers.l2(0.0001)
        bias_embeddings_initializer = bias_embeddings_initializer or tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.005)
        self.input_x_index = layers.Input(shape=(1, ), name='input_x_index')
        self.input_y_index = layers.Input(shape=(1, ), name='input_y_index')
        self.input_x_feature = layers.Input(shape=(feature_size, ), name='input_x_feature')
        self.input_y_feature = layers.Input(shape=(feature_size, ), name='input_y_feature')

        self.embedding = layers.Embedding(
            max_feature_index + 1,
            embedding_size,
            mask_zero=True,
            embeddings_initializer=embeddings_initializer,
            embeddings_regularizer=embeddings_regularizer,
        )
        self.bias_embedding = tf.keras.layers.Embedding(
            item_size + 1,
            1,
            mask_zero=True,
            embeddings_initializer=bias_embeddings_initializer,
        )

        self.embedding_x = self.average(self.embedding(self.input_x_feature))
        self.embedding_y = self.average(self.embedding(self.input_y_feature))
        self.bias_x = self.average(self.bias_embedding(self.input_x_index))
        self.bias_y = self.average(self.bias_embedding(self.input_y_index))

        self.inner_prod = tf.keras.layers.dot([self.embedding_x, self.embedding_y], axes=1, normalize=True)
        self.similarity = tf.keras.layers.add([self.inner_prod, self.bias_x, self.bias_y])
        self.similarity = self.clip(self.similarity) 
Example #12
Source File: tf2_mthisan.py    From Projects with MIT License 5 votes vote down vote up
def __init__(self,embedding_matrix,num_classes,attention_size,attention_heads):
        
            super(mthisan.mthisan_model,self).__init__()
            self.attention_size = attention_size
            self.attention_heads = attention_heads
            
            self.embedding = layers.Embedding(embedding_matrix.shape[0],
                             embedding_matrix.shape[1],
                             embeddings_initializer=tf.keras.initializers.Constant(
                             embedding_matrix.astype(np.float32)))
            self.word_drop = layers.Dropout(0.1)
            self.word_Q = layers.Dense(self.attention_size)
            self.word_K = layers.Dense(self.attention_size)
            self.word_V = layers.Dense(self.attention_size)
            self.word_target = tf.Variable(tf.random.uniform(shape=[1,self.attention_heads,1,
                               int(self.attention_size/self.attention_heads)]))
            self.word_self_att = layers.Attention(use_scale=True)
            self.word_targ_att = layers.Attention(use_scale=True)
            
            self.line_drop = layers.Dropout(0.1)
            self.line_Q = layers.Dense(self.attention_size)
            self.line_K = layers.Dense(self.attention_size)
            self.line_V = layers.Dense(self.attention_size)
            self.line_target = tf.Variable(tf.random.uniform(shape=[1,self.attention_heads,1,
                               int(self.attention_size/self.attention_heads)]))
            self.line_self_att = layers.Attention(use_scale=True)
            self.line_targ_att = layers.Attention(use_scale=True)

            self.doc_drop = layers.Dropout(0.1)
            
            self.classify_layers = []
            for c in num_classes:
                self.classify_layers.append(layers.Dense(c)) 
Example #13
Source File: tf2_mthisan_mirrored.py    From Projects with MIT License 5 votes vote down vote up
def __init__(self,embedding_matrix,num_classes,attention_size,attention_heads):
        
            super(mthisan.mthisan_model,self).__init__()
            self.attention_size = attention_size
            self.attention_heads = attention_heads
            
            self.embedding = layers.Embedding(embedding_matrix.shape[0],
                             embedding_matrix.shape[1],
                             embeddings_initializer=tf.keras.initializers.Constant(
                             embedding_matrix.astype(np.float32)))
            self.word_drop = layers.Dropout(0.1)
            self.word_Q = layers.Dense(self.attention_size)
            self.word_K = layers.Dense(self.attention_size)
            self.word_V = layers.Dense(self.attention_size)
            self.word_target = tf.Variable(tf.random.uniform(shape=[1,self.attention_heads,1,
                               int(self.attention_size/self.attention_heads)]))
            self.word_self_att = layers.Attention(use_scale=True)
            self.word_targ_att = layers.Attention(use_scale=True)
            
            self.line_drop = layers.Dropout(0.1)
            self.line_Q = layers.Dense(self.attention_size)
            self.line_K = layers.Dense(self.attention_size)
            self.line_V = layers.Dense(self.attention_size)
            self.line_target = tf.Variable(tf.random.uniform(shape=[1,self.attention_heads,1,
                               int(self.attention_size/self.attention_heads)]))
            self.line_self_att = layers.Attention(use_scale=True)
            self.line_targ_att = layers.Attention(use_scale=True)

            self.doc_drop = layers.Dropout(0.1)
            
            self.classify_layers = []
            for c in num_classes:
                self.classify_layers.append(layers.Dense(c)) 
Example #14
Source File: tf2_hisan.py    From Projects with MIT License 5 votes vote down vote up
def __init__(self,embedding_matrix,num_classes,attention_size,attention_heads):
        
            super(hisan.hisan_model,self).__init__()
            self.attention_size = attention_size
            self.attention_heads = attention_heads
            
            self.embedding = layers.Embedding(embedding_matrix.shape[0],
                             embedding_matrix.shape[1],
                             embeddings_initializer=tf.keras.initializers.Constant(
                             embedding_matrix.astype(np.float32)))
            self.word_drop = layers.Dropout(0.1)
            self.word_Q = layers.Dense(self.attention_size)
            self.word_K = layers.Dense(self.attention_size)
            self.word_V = layers.Dense(self.attention_size)
            self.word_target = tf.Variable(tf.random.uniform(shape=[1,self.attention_heads,1,
                               int(self.attention_size/self.attention_heads)]))
            self.word_self_att = layers.Attention(use_scale=True)
            self.word_targ_att = layers.Attention(use_scale=True)
            
            self.line_drop = layers.Dropout(0.1)
            self.line_Q = layers.Dense(self.attention_size)
            self.line_K = layers.Dense(self.attention_size)
            self.line_V = layers.Dense(self.attention_size)
            self.line_target = tf.Variable(tf.random.uniform(shape=[1,self.attention_heads,1,
                               int(self.attention_size/self.attention_heads)]))
            self.line_self_att = layers.Attention(use_scale=True)
            self.line_targ_att = layers.Attention(use_scale=True)

            self.doc_drop = layers.Dropout(0.1)
            self.classify = layers.Dense(num_classes) 
Example #15
Source File: tf2_hisan_mirrored.py    From Projects with MIT License 5 votes vote down vote up
def __init__(self,embedding_matrix,num_classes,attention_size,attention_heads):
        
            super(hisan.hisan_model,self).__init__()
            self.attention_size = attention_size
            self.attention_heads = attention_heads
            
            self.embedding = layers.Embedding(embedding_matrix.shape[0],
                             embedding_matrix.shape[1],
                             embeddings_initializer=tf.keras.initializers.Constant(
                             embedding_matrix.astype(np.float32)))
            self.word_drop = layers.Dropout(0.1)
            self.word_Q = layers.Dense(self.attention_size)
            self.word_K = layers.Dense(self.attention_size)
            self.word_V = layers.Dense(self.attention_size)
            self.word_target = tf.Variable(tf.random.uniform(shape=[1,self.attention_heads,1,
                               int(self.attention_size/self.attention_heads)]))
            self.word_self_att = layers.Attention(use_scale=True)
            self.word_targ_att = layers.Attention(use_scale=True)
            
            self.line_drop = layers.Dropout(0.1)
            self.line_Q = layers.Dense(self.attention_size)
            self.line_K = layers.Dense(self.attention_size)
            self.line_V = layers.Dense(self.attention_size)
            self.line_target = tf.Variable(tf.random.uniform(shape=[1,self.attention_heads,1,
                               int(self.attention_size/self.attention_heads)]))
            self.line_self_att = layers.Attention(use_scale=True)
            self.line_targ_att = layers.Attention(use_scale=True)

            self.doc_drop = layers.Dropout(0.1)
            self.classify = layers.Dense(num_classes) 
Example #16
Source File: mutator.py    From nni with MIT License 4 votes vote down vote up
def __init__(self, model,
                 lstm_size=64,
                 lstm_num_layers=1,
                 tanh_constant=1.5,
                 cell_exit_extra_step=False,
                 skip_target=0.4,
                 temperature=None,
                 branch_bias=0.25,
                 entropy_reduction='sum'):
        super().__init__(model)
        self.tanh_constant = tanh_constant
        self.temperature = temperature
        self.cell_exit_extra_step = cell_exit_extra_step

        cells = [LSTMCell(units=lstm_size, use_bias=False) for _ in range(lstm_num_layers)]
        self.lstm = RNN(cells, stateful=True)
        self.g_emb = tf.random.normal((1, 1, lstm_size)) * 0.1
        self.skip_targets = tf.constant([1.0 - skip_target, skip_target])

        self.max_layer_choice = 0
        self.bias_dict = {}
        for mutable in self.mutables:
            if isinstance(mutable, LayerChoice):
                if self.max_layer_choice == 0:
                    self.max_layer_choice = len(mutable)
                assert self.max_layer_choice == len(mutable), \
                        "ENAS mutator requires all layer choice have the same number of candidates."
                if 'reduce' in mutable.key:
                    bias = []
                    for choice in mutable.choices:
                        if 'conv' in str(type(choice)).lower():
                            bias.append(branch_bias)
                        else:
                            bias.append(-branch_bias)
                    self.bias_dict[mutable.key] = tf.constant(bias)

        # exposed for trainer
        self.sample_log_prob = 0
        self.sample_entropy = 0
        self.sample_skip_penalty = 0

        # internal nn layers
        self.embedding = Embedding(self.max_layer_choice + 1, lstm_size)
        self.soft = Dense(self.max_layer_choice, use_bias=False)
        self.attn_anchor = Dense(lstm_size, use_bias=False)
        self.attn_query = Dense(lstm_size, use_bias=False)
        self.v_attn = Dense(1, use_bias=False)
        assert entropy_reduction in ['sum', 'mean'], 'Entropy reduction must be one of sum and mean.'
        self.entropy_reduction = tf.reduce_sum if entropy_reduction == 'sum' else tf.reduce_mean
        self.cross_entropy_loss = SparseCategoricalCrossentropy(from_logits=True, reduction=Reduction.NONE)

        self._first_sample = True 
Example #17
Source File: imdb.py    From keras-attention-mechanism with Apache License 2.0 4 votes vote down vote up
def train_and_evaluate_model_on_imdb(add_attention=True):
    numpy.random.seed(7)
    # load the dataset but only keep the top n words, zero the rest
    top_words = 5000
    (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
    # truncate and pad input sequences
    max_review_length = 500
    X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
    X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
    # create the model
    embedding_vector_length = 32
    i = Input(shape=(max_review_length,))
    x = Embedding(top_words, embedding_vector_length, input_length=max_review_length)(i)
    x = Dropout(0.5)(x)
    if add_attention:
        x = LSTM(100, return_sequences=True)(x)
        x = attention_3d_block(x)
    else:
        x = LSTM(100, return_sequences=False)(x)
        x = Dense(350, activation='relu')(x)  # same number of parameters so fair comparison.
    x = Dropout(0.5)(x)
    x = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=[i], outputs=[x])
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    print(model.summary())

    class RecordBestTestAccuracy(Callback):

        def __init__(self):
            super().__init__()
            self.val_accuracies = []
            self.val_losses = []

        def on_epoch_end(self, epoch, logs=None):
            self.val_accuracies.append(logs['val_accuracy'])
            self.val_losses.append(logs['val_loss'])

    rbta = RecordBestTestAccuracy()
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64, callbacks=[rbta])

    print(f"Max Test Accuracy: {100 * np.max(rbta.val_accuracies):.2f} %")
    print(f"Mean Test Accuracy: {100 * np.mean(rbta.val_accuracies):.2f} %") 
Example #18
Source File: keras_model.py    From code2vec with MIT License 4 votes vote down vote up
def _create_keras_model(self):
        # Each input sample consists of a bag of x`MAX_CONTEXTS` tuples (source_terminal, path, target_terminal).
        # The valid mask indicates for each context whether it actually exists or it is just a padding.
        path_source_token_input = Input((self.config.MAX_CONTEXTS,), dtype=tf.int32)
        path_input = Input((self.config.MAX_CONTEXTS,), dtype=tf.int32)
        path_target_token_input = Input((self.config.MAX_CONTEXTS,), dtype=tf.int32)
        context_valid_mask = Input((self.config.MAX_CONTEXTS,))

        # Input paths are indexes, we embed these here.
        paths_embedded = Embedding(
            self.vocabs.path_vocab.size, self.config.PATH_EMBEDDINGS_SIZE, name='path_embedding')(path_input)

        # Input terminals are indexes, we embed these here.
        token_embedding_shared_layer = Embedding(
            self.vocabs.token_vocab.size, self.config.TOKEN_EMBEDDINGS_SIZE, name='token_embedding')
        path_source_token_embedded = token_embedding_shared_layer(path_source_token_input)
        path_target_token_embedded = token_embedding_shared_layer(path_target_token_input)

        # `Context` is a concatenation of the 2 terminals & path embedding.
        # Each context is a vector of size 3 * EMBEDDINGS_SIZE.
        context_embedded = Concatenate()([path_source_token_embedded, paths_embedded, path_target_token_embedded])
        context_embedded = Dropout(1 - self.config.DROPOUT_KEEP_RATE)(context_embedded)

        # Lets get dense: Apply a dense layer for each context vector (using same weights for all of the context).
        context_after_dense = TimeDistributed(
            Dense(self.config.CODE_VECTOR_SIZE, use_bias=False, activation='tanh'))(context_embedded)

        # The final code vectors are received by applying attention to the "densed" context vectors.
        code_vectors, attention_weights = AttentionLayer(name='attention')(
            [context_after_dense, context_valid_mask])

        # "Decode": Now we use another dense layer to get the target word embedding from each code vector.
        target_index = Dense(
            self.vocabs.target_vocab.size, use_bias=False, activation='softmax', name='target_index')(code_vectors)

        # Wrap the layers into a Keras model, using our subtoken-metrics and the CE loss.
        inputs = [path_source_token_input, path_input, path_target_token_input, context_valid_mask]
        self.keras_train_model = keras.Model(inputs=inputs, outputs=target_index)

        # Actual target word predictions (as strings). Used as a second output layer.
        # Used for predict() and for the evaluation metrics calculations.
        topk_predicted_words, topk_predicted_words_scores = TopKWordPredictionsLayer(
            self.config.TOP_K_WORDS_CONSIDERED_DURING_PREDICTION,
            self.vocabs.target_vocab.get_index_to_word_lookup_table(),
            name='target_string')(target_index)

        # We use another dedicated Keras model for evaluation.
        # The evaluation model outputs the `topk_predicted_words` as a 2nd output.
        # The separation between train and eval models is for efficiency.
        self.keras_eval_model = keras.Model(
            inputs=inputs, outputs=[target_index, topk_predicted_words], name="code2vec-keras-model")

        # We use another dedicated Keras function to produce predictions.
        # It have additional outputs than the original model.
        # It is based on the trained layers of the original model and uses their weights.
        predict_outputs = tuple(KerasPredictionModelOutput(
            target_index=target_index, code_vectors=code_vectors, attention_weights=attention_weights,
            topk_predicted_words=topk_predicted_words, topk_predicted_words_scores=topk_predicted_words_scores))
        self.keras_model_predict_function = K.function(inputs=inputs, outputs=predict_outputs)