Python keras.layers.wrappers.TimeDistributed() Examples

The following are 30 code examples of keras.layers.wrappers.TimeDistributed(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.layers.wrappers , or try the search function .
Example #1
Source File: models.py    From SeqGAN with MIT License 6 votes vote down vote up
def GeneratorPretraining(V, E, H):
    '''
    Model for Generator pretraining. This model's weights should be shared with
        Generator.
    # Arguments:
        V: int, Vocabrary size
        E: int, Embedding size
        H: int, LSTM hidden size
    # Returns:
        generator_pretraining: keras Model
            input: word ids, shape = (B, T)
            output: word probability, shape = (B, T, V)
    '''
    # in comment, B means batch size, T means lengths of time steps.
    input = Input(shape=(None,), dtype='int32', name='Input') # (B, T)
    out = Embedding(V, E, mask_zero=True, name='Embedding')(input) # (B, T, E)
    out = LSTM(H, return_sequences=True, name='LSTM')(out)  # (B, T, H)
    out = TimeDistributed(
        Dense(V, activation='softmax', name='DenseSoftmax'),
        name='TimeDenseSoftmax')(out)    # (B, T, V)
    generator_pretraining = Model(input, out)
    return generator_pretraining 
Example #2
Source File: networks.py    From VizDoom-Keras-RL with MIT License 6 votes vote down vote up
def drqn(input_shape, action_size, learning_rate):

        model = Sequential()
        model.add(TimeDistributed(Convolution2D(32, 8, 8, subsample=(4,4), activation='relu'), input_shape=(input_shape)))
        model.add(TimeDistributed(Convolution2D(64, 4, 4, subsample=(2,2), activation='relu')))
        model.add(TimeDistributed(Convolution2D(64, 3, 3, activation='relu')))
        model.add(TimeDistributed(Flatten()))

        # Use all traces for training
        #model.add(LSTM(512, return_sequences=True,  activation='tanh'))
        #model.add(TimeDistributed(Dense(output_dim=action_size, activation='linear')))

        # Use last trace for training
        model.add(LSTM(512,  activation='tanh'))
        model.add(Dense(output_dim=action_size, activation='linear'))

        adam = Adam(lr=learning_rate)
        model.compile(loss='mse',optimizer=adam)

        return model 
Example #3
Source File: finetuning.py    From DeepMoji with MIT License 6 votes vote down vote up
def change_trainable(layer, trainable, verbose=False):
    """ Helper method that fixes some of Keras' issues with wrappers and
        trainability. Freezes or unfreezes a given layer.

    # Arguments:
        layer: Layer to be modified.
        trainable: Whether the layer should be frozen or unfrozen.
        verbose: Verbosity flag.
    """

    layer.trainable = trainable

    if type(layer) == Bidirectional:
        layer.backward_layer.trainable = trainable
        layer.forward_layer.trainable = trainable

    if type(layer) == TimeDistributed:
        layer.backward_layer.trainable = trainable

    if verbose:
        action = 'Unfroze' if trainable else 'Froze'
        print("{} {}".format(action, layer.name)) 
Example #4
Source File: networks.py    From VizDoom-Keras-RL with MIT License 6 votes vote down vote up
def a2c_lstm(input_shape, action_size, value_size, learning_rate):
        """Actor and Critic Network share convolution layers with LSTM
        """

        state_input = Input(shape=(input_shape)) # 4x64x64x3
        x = TimeDistributed(Convolution2D(32, 8, 8, subsample=(4,4), activation='relu'))(state_input)
        x = TimeDistributed(Convolution2D(64, 4, 4, subsample=(2,2), activation='relu'))(x)
        x = TimeDistributed(Convolution2D(64, 3, 3, activation='relu'))(x)
        x = TimeDistributed(Flatten())(x)

        x = LSTM(512, activation='tanh')(x)

        # Actor Stream
        actor = Dense(action_size, activation='softmax')(x)

        # Critic Stream
        critic = Dense(value_size, activation='linear')(x)

        model = Model(input=state_input, output=[actor, critic])

        adam = Adam(lr=learning_rate, clipnorm=1.0)
        model.compile(loss=['categorical_crossentropy', 'mse'], optimizer=adam, loss_weights=[1., 1.])

        return model 
Example #5
Source File: models.py    From DeepTL-Lane-Change-Classification with MIT License 6 votes vote down vote up
def build_cnn_to_lstm_model(self, input_shape, optimizer=Adam(lr=1e-6, decay=1e-5)):

        model = Sequential()

        model.add(TimeDistributed(Convolution2D(16, 3, 3), input_shape=input_shape))
        model.add(TimeDistributed(Activation('relu')))
        model.add(TimeDistributed(Convolution2D(16, 3, 3)))
        model.add(TimeDistributed(Activation('relu')))
        model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
        model.add(TimeDistributed(Dropout(0.2)))
        model.add(TimeDistributed(Flatten()))
        model.add(TimeDistributed(Dense(200)))
        model.add(TimeDistributed(Dense(50, name="first_dense")))
        model.add(LSTM(20, return_sequences=False, name="lstm_layer"))
        model.add(Dense(2, activation='softmax'))

        model.compile(loss='categorical_crossentropy', optimizer=optimizer)

        self.model = model 
Example #6
Source File: test_keras_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_large_batch_gpu(self):
        batch_size = 2049
        num_channels = 4
        kernel_size = 3

        model = Sequential()
        model.add(
            TimeDistributed(Dense(num_channels), input_shape=(batch_size, kernel_size))
        )

        model.set_weights(
            [(np.random.rand(*w.shape) - 0.5) / 5.0 for w in model.get_weights()]
        )

        self._test_keras_model(
            model, input_blob="data", output_blob="output", delta=1e-2
        ) 
Example #7
Source File: test_keras_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_tiny_image_captioning(self):
        # use a conv layer as a image feature branch
        img_input_1 = Input(shape=(16, 16, 3))
        x = Convolution2D(2, 3, 3)(img_input_1)
        x = Flatten()(x)
        img_model = Model([img_input_1], [x])

        img_input = Input(shape=(16, 16, 3))
        x = img_model(img_input)
        x = Dense(8, name="cap_dense")(x)
        x = Reshape((1, 8), name="cap_reshape")(x)

        sentence_input = Input(shape=(5,))  # max_length = 5
        y = Embedding(8, 8, name="cap_embedding")(sentence_input)
        z = merge([x, y], mode="concat", concat_axis=1, name="cap_merge")
        z = LSTM(4, return_sequences=True, name="cap_lstm")(z)
        z = TimeDistributed(Dense(8), name="cap_timedistributed")(z)

        combined_model = Model([img_input, sentence_input], [z])
        self._test_keras_model(combined_model, one_dim_seq_flags=[False, True]) 
Example #8
Source File: test_keras2_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_large_batch_gpu(self):

        batch_size = 2049
        num_channels = 4
        kernel_size = 3

        model = Sequential()
        model.add(
            TimeDistributed(Dense(num_channels), input_shape=(batch_size, kernel_size))
        )

        model.set_weights(
            [(np.random.rand(*w.shape) - 0.5) * 0.2 for w in model.get_weights()]
        )

        self._test_model(model, delta=1e-2) 
Example #9
Source File: test_keras2_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_time_distributed_conv(self):
        model = Sequential()
        model.add(
            TimeDistributed(
                Conv2D(64, (3, 3), activation="relu"), input_shape=(1, 30, 30, 3)
            )
        )
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(1, 1))))
        model.add(TimeDistributed(Conv2D(32, (4, 4), activation="relu")))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))
        model.add(TimeDistributed(Conv2D(32, (4, 4), activation="relu")))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))
        model.add(TimeDistributed(Flatten()))
        model.add(Dropout(0.5))
        model.add(LSTM(32, return_sequences=False, dropout=0.5))
        model.add(Dense(10, activation="sigmoid"))
        self._test_model(model) 
Example #10
Source File: test_keras2_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_tiny_image_captioning(self):
        # use a conv layer as a image feature branch
        img_input_1 = Input(shape=(16, 16, 3))
        x = Conv2D(2, (3, 3))(img_input_1)
        x = Flatten()(x)
        img_model = Model(inputs=[img_input_1], outputs=[x])

        img_input = Input(shape=(16, 16, 3))
        x = img_model(img_input)
        x = Dense(8, name="cap_dense")(x)
        x = Reshape((1, 8), name="cap_reshape")(x)

        sentence_input = Input(shape=(5,))  # max_length = 5
        y = Embedding(8, 8, name="cap_embedding")(sentence_input)
        z = concatenate([x, y], axis=1, name="cap_merge")
        z = LSTM(4, return_sequences=True, name="cap_lstm")(z)
        z = TimeDistributed(Dense(8), name="cap_timedistributed")(z)

        combined_model = Model(inputs=[img_input, sentence_input], outputs=[z])
        self._test_model(combined_model, one_dim_seq_flags=[False, True]) 
Example #11
Source File: wrappers_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_regularizers():
    model = Sequential()
    model.add(wrappers.TimeDistributed(
        layers.Dense(2, kernel_regularizer='l1'), input_shape=(3, 4)))
    model.add(layers.Activation('relu'))
    model.compile(optimizer='rmsprop', loss='mse')
    assert len(model.layers[0].layer.losses) == 1
    assert len(model.layers[0].losses) == 1
    assert len(model.layers[0].get_losses_for(None)) == 1
    assert len(model.losses) == 1

    model = Sequential()
    model.add(wrappers.TimeDistributed(
        layers.Dense(2, activity_regularizer='l1'), input_shape=(3, 4)))
    model.add(layers.Activation('relu'))
    model.compile(optimizer='rmsprop', loss='mse')
    assert len(model.losses) == 1 
Example #12
Source File: wrappers_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_regularizers():
    model = Sequential()
    model.add(wrappers.TimeDistributed(
        layers.Dense(2, kernel_regularizer='l1'), input_shape=(3, 4)))
    model.add(layers.Activation('relu'))
    model.compile(optimizer='rmsprop', loss='mse')
    assert len(model.layers[0].layer.losses) == 1
    assert len(model.layers[0].losses) == 1
    assert len(model.layers[0].get_losses_for(None)) == 1
    assert len(model.losses) == 1

    model = Sequential()
    model.add(wrappers.TimeDistributed(
        layers.Dense(2, activity_regularizer='l1'), input_shape=(3, 4)))
    model.add(layers.Activation('relu'))
    model.compile(optimizer='rmsprop', loss='mse')
    assert len(model.losses) == 1 
Example #13
Source File: wrappers_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_regularizers():
    model = Sequential()
    model.add(wrappers.TimeDistributed(
        layers.Dense(2, kernel_regularizer='l1'), input_shape=(3, 4)))
    model.add(layers.Activation('relu'))
    model.compile(optimizer='rmsprop', loss='mse')
    assert len(model.layers[0].layer.losses) == 1
    assert len(model.layers[0].losses) == 1
    assert len(model.layers[0].get_losses_for(None)) == 1
    assert len(model.losses) == 1

    model = Sequential()
    model.add(wrappers.TimeDistributed(
        layers.Dense(2, activity_regularizer='l1'), input_shape=(3, 4)))
    model.add(layers.Activation('relu'))
    model.compile(optimizer='rmsprop', loss='mse')
    assert len(model.losses) == 1 
Example #14
Source File: wrappers_test.py    From DeepLearning_Wavelet-LSTM with MIT License 6 votes vote down vote up
def test_regularizers():
    model = Sequential()
    model.add(wrappers.TimeDistributed(
        layers.Dense(2, kernel_regularizer='l1'), input_shape=(3, 4)))
    model.add(layers.Activation('relu'))
    model.compile(optimizer='rmsprop', loss='mse')
    assert len(model.layers[0].layer.losses) == 1
    assert len(model.layers[0].losses) == 1
    assert len(model.layers[0].get_losses_for(None)) == 1
    assert len(model.losses) == 1

    model = Sequential()
    model.add(wrappers.TimeDistributed(
        layers.Dense(2, activity_regularizer='l1'), input_shape=(3, 4)))
    model.add(layers.Activation('relu'))
    model.compile(optimizer='rmsprop', loss='mse')
    assert len(model.losses) == 1 
Example #15
Source File: dense.py    From costar_plan with Apache License 2.0 6 votes vote down vote up
def GetLSTMEncoder(xin, uin, dense_size, lstm_size, dense_layers=1,
        lstm_layers=1):
    '''
    Get LSTM encoder.
    '''
    x = xin
    for _ in xrange(dense_layers):
        if uin is not None:
            x = Concatenate(axis=-1)([x, uin])
        x = TimeDistributed(Dense(dense_size))(x)
        x = TimeDistributed(Activation('relu'))(x)
    for i in xrange(lstm_layers):
        if i == lstm_layers - 1:
            sequence_out = False
        else:
            sequence_out = True
        #sequence_out = True
        x = LSTM(lstm_size, return_sequences=sequence_out)(x)
        x = Activation('relu')(x)
    return x 
Example #16
Source File: MaskRCNN.py    From PyTorch-Luna16 with Apache License 2.0 5 votes vote down vote up
def find_trainable_layer(self, layer):
        """If a layer is encapsulated by another layer, this function
        digs through the encapsulation and returns the layer that holds
        the weights.
        """
        if layer.__class__.__name__ == 'TimeDistributed':
            return self.find_trainable_layer(layer.layer)
        return layer 
Example #17
Source File: model.py    From neural_complete with MIT License 5 votes vote down vote up
def construct_model(maxlen, input_dimension, output_dimension, lstm_vector_output_dim):
    """
        Склеены три слова
    """
    input = Input(shape=(maxlen, input_dimension), name='input')


    # lstm_encode = LSTM(lstm_vector_output_dim)(input)
    lstm_encode = SimpleRNN(lstm_vector_output_dim, activation='sigmoid')(input)


    encoded_copied = RepeatVector(n=maxlen)(lstm_encode)


    # lstm_decode = LSTM(output_dim=output_dimension, return_sequences=True, activation='softmax')(encoded_copied)
    lstm_decode = SimpleRNN(output_dim=output_dimension, return_sequences=True, activation='softmax')(encoded_copied)


    decoded = TimeDistributed(Dense(output_dimension, activation='softmax'))(lstm_decode)


    encoder_decoder = Model(input, decoded)


    adam = Adam()
    encoder_decoder.compile(loss='categorical_crossentropy', optimizer=adam)


    return encoder_decoder 
Example #18
Source File: model.py    From keras-molecules with MIT License 5 votes vote down vote up
def _buildDecoder(self, z, latent_rep_size, max_length, charset_length):
        h = Dense(latent_rep_size, name='latent_input', activation = 'relu')(z)
        h = RepeatVector(max_length, name='repeat_vector')(h)
        h = GRU(501, return_sequences = True, name='gru_1')(h)
        h = GRU(501, return_sequences = True, name='gru_2')(h)
        h = GRU(501, return_sequences = True, name='gru_3')(h)
        return TimeDistributed(Dense(charset_length, activation='softmax'), name='decoded_mean')(h) 
Example #19
Source File: rnn_tagger.py    From neural_complete with MIT License 5 votes vote down vote up
def bilstm_tagger(ft_model, n_tags, maxlen,
                  lstm_dims=150, hidden_dim=100, rnn_layers=3, dropout=0.2):
    input_layer = Input(shape=(maxlen, ft_model.dim), name='input')
    lstm = bilstm_layer(input_layer, lstm_dims, rnn_layers, dropout)
    output_layer = TimeDistributed(Dense(n_tags, activation='softmax'))(lstm)
    model = Model(input=input_layer, output=output_layer)
    return model 
Example #20
Source File: densenet.py    From deep_learning with MIT License 5 votes vote down vote up
def dense_cnn(input, nclass):

    _dropout_rate = 0.2
    _weight_decay = 1e-4

    _nb_filter = 64
    # conv 64  5*5 s=2
    x = Conv2D(_nb_filter, (5, 5), strides=(2, 2), kernel_initializer='he_normal', padding='same',
               use_bias=False, kernel_regularizer=l2(_weight_decay))(input)

    # 64 +  8 * 8 = 128
    x, _nb_filter = dense_block(x, 8, _nb_filter, 8, None, _weight_decay)
    #128
    x, _nb_filter = transition_block(x, 128, _dropout_rate, 2, _weight_decay)

    #128 + 8 * 8 = 192
    x, _nb_filter = dense_block(x, 8, _nb_filter, 8, None, _weight_decay)
    #192->128
    x, _nb_filter = transition_block(x, 128, _dropout_rate, 2, _weight_decay)

    #128 + 8 * 8 = 192
    x, _nb_filter = dense_block(x, 8, _nb_filter, 8, None, _weight_decay)

    x = BatchNormalization(axis=-1, epsilon=1.1e-5)(x)
    x = Activation('relu')(x)

    x = Permute((2, 1, 3), name='permute')(x)
    x = TimeDistributed(Flatten(), name='flatten')(x)
    y_pred = Dense(nclass, name='out', activation='softmax')(x)

    basemodel = Model(inputs=input,outputs=y_pred)
    basemodel.summary()
    return basemodel 
Example #21
Source File: model.py    From deepchem with MIT License 5 votes vote down vote up
def _buildDecoder(self, z, latent_rep_size, max_length, charset_length):
    h = Dense(latent_rep_size, name='latent_input', activation='relu')(z)
    h = RepeatVector(max_length, name='repeat_vector')(h)
    h = GRU(501, return_sequences=True, name='gru_1')(h)
    h = GRU(501, return_sequences=True, name='gru_2')(h)
    h = GRU(501, return_sequences=True, name='gru_3')(h)
    return TimeDistributed(
        Dense(charset_length, activation='softmax'), name='decoded_mean')(h) 
Example #22
Source File: wrappers_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_TimeDistributed_trainable():
    # test layers that need learning_phase to be set
    x = Input(shape=(3, 2))
    layer = wrappers.TimeDistributed(layers.BatchNormalization())
    _ = layer(x)
    assert len(layer.updates) == 2
    assert len(layer.trainable_weights) == 2
    layer.trainable = False
    assert len(layer.updates) == 0
    assert len(layer.trainable_weights) == 0
    layer.trainable = True
    assert len(layer.updates) == 2
    assert len(layer.trainable_weights) == 2 
Example #23
Source File: wrappers_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_TimeDistributed_learning_phase():
    # test layers that need learning_phase to be set
    np.random.seed(1234)
    x = Input(shape=(3, 2))
    y = wrappers.TimeDistributed(layers.Dropout(.999))(x, training=True)
    model = Model(x, y)
    y = model.predict(np.random.random((10, 3, 2)))
    assert_allclose(np.mean(y), 0., atol=1e-1, rtol=1e-1) 
Example #24
Source File: test_keras.py    From wtte-rnn with MIT License 5 votes vote down vote up
def model_no_masking(discrete_time, init_alpha, max_beta):
    model = Sequential()
    model.add(TimeDistributed(Dense(2), input_shape=(n_timesteps, n_features)))

    model.add(Lambda(wtte.output_lambda, arguments={"init_alpha": init_alpha,
                                                    "max_beta_value": max_beta}))

    if discrete_time:
        loss = wtte.loss(kind='discrete').loss_function
    else:
        loss = wtte.loss(kind='continuous').loss_function

    model.compile(loss=loss, optimizer=RMSprop(lr=lr))

    return model 
Example #25
Source File: wrappers_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_TimeDistributed_trainable():
    # test layers that need learning_phase to be set
    x = Input(shape=(3, 2))
    layer = wrappers.TimeDistributed(layers.BatchNormalization())
    _ = layer(x)
    assert len(layer.updates) == 2
    assert len(layer.trainable_weights) == 2
    layer.trainable = False
    assert len(layer.updates) == 0
    assert len(layer.trainable_weights) == 0
    layer.trainable = True
    assert len(layer.updates) == 2
    assert len(layer.trainable_weights) == 2 
Example #26
Source File: wrappers_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_TimeDistributed_learning_phase():
    # test layers that need learning_phase to be set
    np.random.seed(1234)
    x = Input(shape=(3, 2))
    y = wrappers.TimeDistributed(layers.Dropout(.999))(x, training=True)
    model = Model(x, y)
    y = model.predict(np.random.random((10, 3, 2)))
    assert_allclose(np.mean(y), 0., atol=1e-1, rtol=1e-1) 
Example #27
Source File: wrappers_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_TimeDistributed_trainable():
    # test layers that need learning_phase to be set
    x = Input(shape=(3, 2))
    layer = wrappers.TimeDistributed(layers.BatchNormalization())
    _ = layer(x)
    assert len(layer.updates) == 2
    assert len(layer.trainable_weights) == 2
    layer.trainable = False
    assert len(layer.updates) == 0
    assert len(layer.trainable_weights) == 0
    layer.trainable = True
    assert len(layer.updates) == 2
    assert len(layer.trainable_weights) == 2 
Example #28
Source File: wrappers_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_TimeDistributed_learning_phase():
    # test layers that need learning_phase to be set
    np.random.seed(1234)
    x = Input(shape=(3, 2))
    y = wrappers.TimeDistributed(layers.Dropout(.999))(x, training=True)
    model = Model(x, y)
    y = model.predict(np.random.random((10, 3, 2)))
    assert_allclose(np.mean(y), 0., atol=1e-1, rtol=1e-1) 
Example #29
Source File: wrappers_test.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def test_TimeDistributed_trainable():
    # test layers that need learning_phase to be set
    x = Input(shape=(3, 2))
    layer = wrappers.TimeDistributed(layers.BatchNormalization())
    _ = layer(x)
    assert len(layer.updates) == 2
    assert len(layer.trainable_weights) == 2
    layer.trainable = False
    assert len(layer.updates) == 0
    assert len(layer.trainable_weights) == 0
    layer.trainable = True
    assert len(layer.updates) == 2
    assert len(layer.trainable_weights) == 2 
Example #30
Source File: model.py    From LipNet with MIT License 5 votes vote down vote up
def build(self):
        if K.image_data_format() == 'channels_first':
            input_shape = (self.img_c, self.frames_n, self.img_w, self.img_h)
        else:
            input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c)

        self.input_data = Input(name='the_input', shape=input_shape, dtype='float32')

        self.zero1 = ZeroPadding3D(padding=(1, 2, 2), name='zero1')(self.input_data)
        self.conv1 = Conv3D(32, (3, 5, 5), strides=(1, 2, 2), activation='relu', kernel_initializer='he_normal', name='conv1')(self.zero1)
        self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max1')(self.conv1)
        self.drop1 = Dropout(0.5)(self.maxp1)

        self.zero2 = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(self.drop1)
        self.conv2 = Conv3D(64, (3, 5, 5), strides=(1, 1, 1), activation='relu', kernel_initializer='he_normal', name='conv2')(self.zero2)
        self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max2')(self.conv2)
        self.drop2 = Dropout(0.5)(self.maxp2)

        self.zero3 = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(self.drop2)
        self.conv3 = Conv3D(96, (3, 3, 3), strides=(1, 1, 1), activation='relu', kernel_initializer='he_normal', name='conv3')(self.zero3)
        self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max3')(self.conv3)
        self.drop3 = Dropout(0.5)(self.maxp3)

        self.resh1 = TimeDistributed(Flatten())(self.drop3)

        self.gru_1 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru1'), merge_mode='concat')(self.resh1)
        self.gru_2 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru2'), merge_mode='concat')(self.gru_1)

        # transforms RNN output to character activations:
        self.dense1 = Dense(self.output_size, kernel_initializer='he_normal', name='dense1')(self.gru_2)

        self.y_pred = Activation('softmax', name='softmax')(self.dense1)

        self.labels = Input(name='the_labels', shape=[self.absolute_max_string_len], dtype='float32')
        self.input_length = Input(name='input_length', shape=[1], dtype='int64')
        self.label_length = Input(name='label_length', shape=[1], dtype='int64')

        self.loss_out = CTC('ctc', [self.y_pred, self.labels, self.input_length, self.label_length])

        self.model = Model(inputs=[self.input_data, self.labels, self.input_length, self.label_length], outputs=self.loss_out)