Python keras.layers.recurrent.GRU Examples

The following are 30 code examples for showing how to use keras.layers.recurrent.GRU(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module keras.layers.recurrent , or try the search function .

Example 1
Project: CAPTCHA-breaking   Author: lllcho   File: test_tasks.py    License: MIT License 6 votes vote down vote up
def test_temporal_clf(self):
        print('temporal classification data:')
        (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5,10), 
                                                             classification=True, nb_class=2)
        print('X_train:', X_train.shape)
        print('X_test:', X_test.shape)
        print('y_train:', y_train.shape)
        print('y_test:', y_test.shape)

        y_train = to_categorical(y_train)
        y_test = to_categorical(y_test)

        model = Sequential()
        model.add(GRU(X_train.shape[-1], y_train.shape[-1]))
        model.add(Activation('softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adadelta')
        history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2)
        self.assertTrue(history.history['val_acc'][-1] > 0.9) 
Example 2
Project: soph   Author: Linusp   File: understand.py    License: MIT License 6 votes vote down vote up
def understand_variable_length_handle():
    """用来帮助理解如何用 recurrent layer 处理变长序列"""
    model = Sequential()
    model.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
    model.compile(loss='mean_squared_error', optimizer='sgd')
    train_x = np.random.randn(100, 78, 256)
    train_y = np.random.randn(100, 78, 256)
    model.fit(train_x, train_y, verbose=0)

    inz_1 = np.random.randn(1, 78, 256)
    rez_1 = model.predict_proba(inz_1, verbose=0)

    inz_2 = np.random.randn(1, 87, 256)
    rez_2 = model.predict_proba(inz_2, verbose=0)

    print()
    print('=========== understand variable length =================')
    print('With `return_sequence=True`')
    print('Input shape is: {}, output shae is {}'.format(inz_1.shape, rez_1.shape))
    print('Input shape is: {}, output shae is {}'.format(inz_2.shape, rez_2.shape))
    print('====================== end =============================') 
Example 3
Project: soph   Author: Linusp   File: understand.py    License: MIT License 6 votes vote down vote up
def try_variable_length_train():
    """变长序列训练实验

    实验失败,这样得到的 train_x 和 train_y 的 dtype 是 object 类型,
    取其 shape 得到的是 (100,) ,这将导致训练出错
    """
    model = Sequential()
    model.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
    model.compile(loss='mean_squared_error', optimizer='sgd')

    train_x = []
    train_y = []
    for i in range(100):
        seq_length = np.random.randint(78, 87 + 1)
        sequence = []
        for _ in range(seq_length):
            sequence.append([np.random.randn() for _ in range(256)])

        train_x.append(np.array(sequence))
        train_y.append(np.array(sequence))

    train_x = np.array(train_x)
    train_y = np.array(train_y)

    model.fit(np.array(train_x), np.array(train_y)) 
Example 4
Project: DeepSequenceClassification   Author: napsternxg   File: model.py    License: GNU General Public License v2.0 6 votes vote down vote up
def gen_model(vocab_size=100, embedding_size=128, maxlen=100, output_size=6, hidden_layer_size=100, num_hidden_layers = 1, RNN_LAYER_TYPE="LSTM"):
    RNN_CLASS = LSTM
    if RNN_LAYER_TYPE == "GRU":
        RNN_CLASS = GRU
    logger.info("Parameters: vocab_size = %s, embedding_size = %s, maxlen = %s, output_size = %s, hidden_layer_size = %s, " %\
            (vocab_size, embedding_size, maxlen, output_size, hidden_layer_size))
    logger.info("Building Model")
    model = Sequential()
    logger.info("Init Model with vocab_size = %s, embedding_size = %s, maxlen = %s" % (vocab_size, embedding_size, maxlen))
    model.add(Embedding(vocab_size, embedding_size, input_length=maxlen))
    logger.info("Added Embedding Layer")
    model.add(Dropout(0.5))
    logger.info("Added Dropout Layer")
    for i in xrange(num_hidden_layers):
        model.add(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True))
        logger.info("Added %s Layer" % RNN_LAYER_TYPE)
        model.add(Dropout(0.5))
        logger.info("Added Dropout Layer")
    model.add(RNN_CLASS(output_dim=output_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True))
    logger.info("Added %s Layer" % RNN_LAYER_TYPE)
    model.add(Dropout(0.5))
    logger.info("Added Dropout Layer")
    model.add(TimeDistributedDense(output_size, activation="softmax"))
    logger.info("Added Dropout Layer")
    logger.info("Created model with following config:\n%s" % json.dumps(model.get_config(), indent=4))
    logger.info("Compiling model with optimizer %s" % optimizer)
    start_time = time.time()
    model.compile(loss='categorical_crossentropy', optimizer=optimizer)
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f seconds." % total_time)
    return model 
Example 5
Project: TrafficFlowPrediction   Author: zbj6633   File: model.py    License: MIT License 6 votes vote down vote up
def get_gru(units):
    """GRU(Gated Recurrent Unit)
    Build GRU Model.

    # Arguments
        units: List(int), number of input, output and hidden units.
    # Returns
        model: Model, nn model.
    """

    model = Sequential()
    model.add(GRU(units[1], input_shape=(units[0], 1), return_sequences=True))
    model.add(GRU(units[2]))
    model.add(Dropout(0.2))
    model.add(Dense(units[3], activation='sigmoid'))

    return model 
Example 6
Project: TrafficFlowPrediction   Author: xiaochus   File: model.py    License: MIT License 6 votes vote down vote up
def get_gru(units):
    """GRU(Gated Recurrent Unit)
    Build GRU Model.

    # Arguments
        units: List(int), number of input, output and hidden units.
    # Returns
        model: Model, nn model.
    """

    model = Sequential()
    model.add(GRU(units[1], input_shape=(units[0], 1), return_sequences=True))
    model.add(GRU(units[2]))
    model.add(Dropout(0.2))
    model.add(Dense(units[3], activation='sigmoid'))

    return model 
Example 7
Project: deepchem   Author: deepchem   File: models.py    License: MIT License 5 votes vote down vote up
def __init__(self, input_shape, gru_size=10, tdd_size=4):
    self.model = Sequential()
    self.model.add(
        GRU(gru_size, return_sequences=True, input_shape=input_shape))
    if tdd_size is not None:
      self.model.add(TimeDistributedDense(tdd_size))
    self.model.add(Flatten())
    self.model.add(Dense(1))
    self.model.add(Activation('sigmoid'))
    print('Compiling model...')
    self.model.compile(optimizer='adam', loss='binary_crossentropy') 
Example 8
Project: deepchem   Author: deepchem   File: model.py    License: MIT License 5 votes vote down vote up
def _buildDecoder(self, z, latent_rep_size, max_length, charset_length):
    h = Dense(latent_rep_size, name='latent_input', activation='relu')(z)
    h = RepeatVector(max_length, name='repeat_vector')(h)
    h = GRU(501, return_sequences=True, name='gru_1')(h)
    h = GRU(501, return_sequences=True, name='gru_2')(h)
    h = GRU(501, return_sequences=True, name='gru_3')(h)
    return TimeDistributed(
        Dense(charset_length, activation='softmax'), name='decoded_mean')(h) 
Example 9
Project: CAPTCHA-breaking   Author: lllcho   File: test_recurrent.py    License: MIT License 5 votes vote down vote up
def test_gru(self):
        _runner(recurrent.GRU) 
Example 10
Project: CAPTCHA-breaking   Author: lllcho   File: test_tasks.py    License: MIT License 5 votes vote down vote up
def test_temporal_reg(self):
        print('temporal regression data:')
        (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5, 10), output_shape=(2,),
                                                             classification=False)
        print('X_train:', X_train.shape)
        print('X_test:', X_test.shape)
        print('y_train:', y_train.shape)
        print('y_test:', y_test.shape)

        model = Sequential()
        model.add(GRU(X_train.shape[-1], y_train.shape[-1]))
        model.compile(loss='hinge', optimizer='adam')
        history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2)
        self.assertTrue(history.history['val_loss'][-1] < 0.8) 
Example 11
Project: soph   Author: Linusp   File: understand.py    License: MIT License 5 votes vote down vote up
def understand_return_sequence():
    """用来帮助理解 recurrent layer 中的 return_sequences 参数"""
    model_1 = Sequential()
    model_1.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
    model_1.compile(loss='mean_squared_error', optimizer='sgd')
    train_x = np.random.randn(100, 78, 256)
    train_y = np.random.randn(100, 78, 256)
    model_1.fit(train_x, train_y, verbose=0)

    model_2 = Sequential()
    model_2.add(GRU(input_dim=256, output_dim=256, return_sequences=False))
    model_2.compile(loss='mean_squared_error', optimizer='sgd')
    train_x = np.random.randn(100, 78, 256)
    train_y = np.random.randn(100, 256)
    model_2.fit(train_x, train_y, verbose=0)

    inz = np.random.randn(100, 78, 256)
    rez_1 = model_1.predict_proba(inz, verbose=0)
    rez_2 = model_2.predict_proba(inz, verbose=0)

    print()
    print('=========== understand return_sequence =================')
    print('Input shape is: {}'.format(inz.shape))
    print('Output shape of model with `return_sequences=True`: {}'.format(rez_1.shape))
    print('Output shape of model with `return_sequences=False`: {}'.format(rez_2.shape))
    print('====================== end =============================') 
Example 12
Project: soph   Author: Linusp   File: understand.py    License: MIT License 5 votes vote down vote up
def try_variable_length_train_in_batch():
    """变长序列训练实验(2)"""
    model = Sequential()
    model.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
    model.compile(loss='mean_squared_error', optimizer='sgd')

    # 分作两个 batch, 不同 batch 中的 sequence 长度不一样
    seq_lens = [78, 87]
    for i in range(2):
        train_x = np.random.randn(20, seq_lens[i], 256)
        train_y = np.random.randn(20, seq_lens[i], 256)
        model.train_on_batch(train_x, train_y) 
Example 13
Project: soph   Author: Linusp   File: pig_latin.py    License: MIT License 5 votes vote down vote up
def build_model(input_size, seq_len, hidden_size):
    """建立一个 sequence to sequence 模型"""
    model = Sequential()
    model.add(GRU(input_dim=input_size, output_dim=hidden_size, return_sequences=False))
    model.add(Dense(hidden_size, activation="relu"))
    model.add(RepeatVector(seq_len))
    model.add(GRU(hidden_size, return_sequences=True))
    model.add(TimeDistributed(Dense(output_dim=input_size, activation="linear")))
    model.compile(loss="mse", optimizer='adam')

    return model 
Example 14
Project: DeepSequenceClassification   Author: napsternxg   File: model.py    License: GNU General Public License v2.0 5 votes vote down vote up
def gen_model_brnn(vocab_size=100, embedding_size=128, maxlen=100, output_size=6, hidden_layer_size=100, num_hidden_layers = 1, RNN_LAYER_TYPE="LSTM"):
    RNN_CLASS = LSTM
    if RNN_LAYER_TYPE == "GRU":
        RNN_CLASS = GRU
    logger.info("Parameters: vocab_size = %s, embedding_size = %s, maxlen = %s, output_size = %s, hidden_layer_size = %s, " %\
            (vocab_size, embedding_size, maxlen, output_size, hidden_layer_size))
    logger.info("Building Graph model for Bidirectional RNN")
    model = Graph()
    model.add_input(name='input', input_shape=(maxlen,), dtype=int)
    logger.info("Added Input node")
    logger.info("Init Model with vocab_size = %s, embedding_size = %s, maxlen = %s" % (vocab_size, embedding_size, maxlen))
    model.add_node(Embedding(vocab_size, embedding_size, input_length=maxlen), name='embedding', input='input')
    logger.info("Added Embedding node")
    model.add_node(Dropout(0.5), name="dropout_0", input="embedding")
    logger.info("Added Dropout Node")
    for i in xrange(num_hidden_layers):
        last_dropout_name = "dropout_%s" % i
        forward_name, backward_name, dropout_name = ["%s_%s" % (k, i + 1) for k in ["forward", "backward", "dropout"]]
        model.add_node(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True), name=forward_name, input=last_dropout_name)
        logger.info("Added %s forward node[%s]" % (RNN_LAYER_TYPE, i+1))
        model.add_node(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True, go_backwards=True), name=backward_name, input=last_dropout_name)
        logger.info("Added %s backward node[%s]" % (RNN_LAYER_TYPE, i+1))
        model.add_node(Dropout(0.5), name=dropout_name, inputs=[forward_name, backward_name])
        logger.info("Added Dropout node[%s]" % (i+1))
    model.add_node(TimeDistributedDense(output_size, activation="softmax"), name="tdd", input=dropout_name)
    logger.info("Added TimeDistributedDense node")
    model.add_output(name="output", input="tdd")
    logger.info("Added Output node")
    logger.info("Created model with following config:\n%s" % model.get_config())
    logger.info("Compiling model with optimizer %s" % optimizer)
    start_time = time.time()
    model.compile(optimizer, {"output": 'categorical_crossentropy'})
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f seconds." % total_time)
    return model 
Example 15
Project: DeepSequenceClassification   Author: napsternxg   File: model.py    License: GNU General Public License v2.0 5 votes vote down vote up
def gen_model_brnn_multitask(vocab_size=100, embedding_size=128, maxlen=100, output_size=[6, 96], hidden_layer_size=100, num_hidden_layers = 1, RNN_LAYER_TYPE="LSTM"):
    RNN_CLASS = LSTM
    if RNN_LAYER_TYPE == "GRU":
        RNN_CLASS = GRU
    logger.info("Parameters: vocab_size = %s, embedding_size = %s, maxlen = %s, output_size = %s, hidden_layer_size = %s, " %\
            (vocab_size, embedding_size, maxlen, output_size, hidden_layer_size))
    logger.info("Building Graph model for Bidirectional RNN")
    model = Graph()
    model.add_input(name='input', input_shape=(maxlen,), dtype=int)
    logger.info("Added Input node")
    logger.info("Init Model with vocab_size = %s, embedding_size = %s, maxlen = %s" % (vocab_size, embedding_size, maxlen))
    model.add_node(Embedding(vocab_size, embedding_size, input_length=maxlen, mask_zero=True), name='embedding', input='input')
    logger.info("Added Embedding node")
    model.add_node(Dropout(0.5), name="dropout_0", input="embedding")
    logger.info("Added Dropout Node")
    for i in xrange(num_hidden_layers):
        last_dropout_name = "dropout_%s" % i
        forward_name, backward_name, dropout_name = ["%s_%s" % (k, i + 1) for k in ["forward", "backward", "dropout"]]
        model.add_node(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True), name=forward_name, input=last_dropout_name)
        logger.info("Added %s forward node[%s]" % (RNN_LAYER_TYPE, i+1))
        model.add_node(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True, go_backwards=True), name=backward_name, input=last_dropout_name)
        logger.info("Added %s backward node[%s]" % (RNN_LAYER_TYPE, i+1))
        model.add_node(Dropout(0.5), name=dropout_name, inputs=[forward_name, backward_name])
        logger.info("Added Dropout node[%s]" % (i+1))
    output_names = []
    for i, output_task_size in enumerate(output_size):
        tdd_name, output_name = "tdd_%s" % i, "output_%s" % i
        model.add_node(TimeDistributedDense(output_task_size, activation="softmax"), name=tdd_name, input=dropout_name)
        logger.info("Added TimeDistributedDense node %s with output_size %s" % (i, output_task_size))
        model.add_output(name=output_name, input=tdd_name)
        output_names.append(output_name)
    logger.info("Added Output node")
    logger.info("Created model with following config:\n%s" % model.get_config())
    logger.info("Compiling model with optimizer %s" % optimizer)
    start_time = time.time()
    model.compile(optimizer, {k: 'categorical_crossentropy' for k in output_names})
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f seconds." % total_time)
    return model, output_names 
Example 16
Project: ntm_keras   Author: flomlo   File: ntm.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def build(self, input_shape):
        bs, input_length, input_dim = input_shape

        self.controller_input_dim, self.controller_output_dim = controller_input_output_shape(
                input_dim, self.units, self.m_depth, self.n_slots, self.shift_range, self.read_heads,
                self.write_heads)
            
        # Now that we've calculated the shape of the controller, we have add it to the layer/model.
        if self.controller is None:
            self.controller = Dense(
                name = "controller",
                activation = 'linear',
                bias_initializer = 'zeros',
                units = self.controller_output_dim,
                input_shape = (bs, input_length, self.controller_input_dim))
            self.controller.build(input_shape=(self.batch_size, input_length, self.controller_input_dim))
            self.controller_with_state = False


        # This is a fixed shift matrix
        self.C = _circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.controller.trainable_weights 

        # We need to declare the number of states we want to carry around.
        # In our case the dimension seems to be 6 (LSTM) or 5 (GRU) or 4 (FF),
        # see self.get_initial_states, those respond to:
        # [old_ntm_output] + [init_M, init_wr, init_ww] +  [init_h] (LSMT and GRU) + [(init_c] (LSTM only))
        # old_ntm_output does not make sense in our world, but is required by the definition of the step function we
        # intend to use.
        # WARNING: What self.state_spec does is only poorly understood,
        # I only copied it from keras/recurrent.py.
        self.states = [None, None, None, None]
        self.state_spec = [InputSpec(shape=(None, self.output_dim)),                            # old_ntm_output
                            InputSpec(shape=(None, self.n_slots, self.m_depth)),                # Memory
                            InputSpec(shape=(None, self.read_heads, self.n_slots)),   # weights_read
                            InputSpec(shape=(None, self.write_heads, self.n_slots))]  # weights_write

        super(NeuralTuringMachine, self).build(input_shape) 
Example 17
Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: recurrent_test.py    License: MIT License 5 votes vote down vote up
def rnn_test(f):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    f = keras_test(f)
    return pytest.mark.parametrize('layer_class', [
        recurrent.SimpleRNN,
        recurrent.GRU,
        recurrent.LSTM
    ])(f) 
Example 18
Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: recurrent_test.py    License: MIT License 5 votes vote down vote up
def rnn_test(f):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    f = keras_test(f)
    return pytest.mark.parametrize('layer_class', [
        recurrent.SimpleRNN,
        recurrent.GRU,
        recurrent.LSTM
    ])(f) 
Example 19
Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: recurrent_test.py    License: MIT License 5 votes vote down vote up
def rnn_test(f):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    f = keras_test(f)
    return pytest.mark.parametrize('layer_class', [
        recurrent.SimpleRNN,
        recurrent.GRU,
        recurrent.LSTM
    ])(f) 
Example 20
Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: recurrent_test.py    License: MIT License 5 votes vote down vote up
def rnn_test(f):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    f = keras_test(f)
    return pytest.mark.parametrize('layer_class', [
        recurrent.SimpleRNN,
        recurrent.GRU,
        recurrent.LSTM
    ])(f) 
Example 21
Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: recurrent_test.py    License: MIT License 5 votes vote down vote up
def rnn_test(f):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    f = keras_test(f)
    return pytest.mark.parametrize('layer_class', [
        recurrent.SimpleRNN,
        recurrent.GRU,
        recurrent.LSTM
    ])(f) 
Example 22
Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: recurrent_test.py    License: MIT License 5 votes vote down vote up
def rnn_test(f):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    f = keras_test(f)
    return pytest.mark.parametrize('layer_class', [
        recurrent.SimpleRNN,
        recurrent.GRU,
        recurrent.LSTM
    ])(f) 
Example 23
Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: recurrent_test.py    License: MIT License 5 votes vote down vote up
def rnn_test(f):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    f = keras_test(f)
    return pytest.mark.parametrize('layer_class', [
        recurrent.SimpleRNN,
        recurrent.GRU,
        recurrent.LSTM
    ])(f) 
Example 24
Project: LipNet   Author: rizkiarm   File: model.py    License: MIT License 5 votes vote down vote up
def build(self):
        if K.image_data_format() == 'channels_first':
            input_shape = (self.img_c, self.frames_n, self.img_w, self.img_h)
        else:
            input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c)

        self.input_data = Input(name='the_input', shape=input_shape, dtype='float32')

        self.zero1 = ZeroPadding3D(padding=(1, 2, 2), name='zero1')(self.input_data)
        self.conv1 = Conv3D(32, (3, 5, 5), strides=(1, 2, 2), activation='relu', kernel_initializer='he_normal', name='conv1')(self.zero1)
        self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max1')(self.conv1)
        self.drop1 = Dropout(0.5)(self.maxp1)

        self.zero2 = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(self.drop1)
        self.conv2 = Conv3D(64, (3, 5, 5), strides=(1, 1, 1), activation='relu', kernel_initializer='he_normal', name='conv2')(self.zero2)
        self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max2')(self.conv2)
        self.drop2 = Dropout(0.5)(self.maxp2)

        self.zero3 = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(self.drop2)
        self.conv3 = Conv3D(96, (3, 3, 3), strides=(1, 1, 1), activation='relu', kernel_initializer='he_normal', name='conv3')(self.zero3)
        self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max3')(self.conv3)
        self.drop3 = Dropout(0.5)(self.maxp3)

        self.resh1 = TimeDistributed(Flatten())(self.drop3)

        self.gru_1 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru1'), merge_mode='concat')(self.resh1)
        self.gru_2 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru2'), merge_mode='concat')(self.gru_1)

        # transforms RNN output to character activations:
        self.dense1 = Dense(self.output_size, kernel_initializer='he_normal', name='dense1')(self.gru_2)

        self.y_pred = Activation('softmax', name='softmax')(self.dense1)

        self.labels = Input(name='the_labels', shape=[self.absolute_max_string_len], dtype='float32')
        self.input_length = Input(name='input_length', shape=[1], dtype='int64')
        self.label_length = Input(name='label_length', shape=[1], dtype='int64')

        self.loss_out = CTC('ctc', [self.y_pred, self.labels, self.input_length, self.label_length])

        self.model = Model(inputs=[self.input_data, self.labels, self.input_length, self.label_length], outputs=self.loss_out) 
Example 25
Project: GRU-D   Author: PeterChe1990   File: grud_layers.py    License: MIT License 5 votes vote down vote up
def build(self, input_shape):
        # Note input_shape will be list of shapes of initial states
        # if these are passed in __call__.

        if not isinstance(input_shape, list) or len(input_shape) <= 2:
            raise ValueError('input_shape of GRU-D should be a list of at least 3.')
        input_shape = input_shape[:3]

        batch_size = input_shape[0][0] if self.stateful else None
        self.input_spec[0] = InputSpec(shape=(batch_size, None, input_shape[0][-1]))
        self.input_spec[1] = InputSpec(shape=(batch_size, None, input_shape[1][-1]))
        self.input_spec[2] = InputSpec(shape=(batch_size, None, 1))

        # allow GRUDCell to build before we set or validate state_spec
        step_input_shape = [(i_s[0],) + i_s[2:] for i_s in input_shape]
        self.cell.build(step_input_shape)

        # set or validate state_spec
        state_size = list(self.cell.state_size)

        if self.state_spec is not None:
            # initial_state was passed in call, check compatibility
            if [spec.shape[-1] for spec in self.state_spec] != state_size:
                raise ValueError(
                    'An `initial_state` was passed that is not compatible with '
                    '`cell.state_size`. Received `state_spec`={}; '
                    'however `cell.state_size` is '
                    '{}'.format(self.state_spec, self.cell.state_size))
        else:
            self.state_spec = [InputSpec(shape=(None, dim))
                               for dim in state_size]
        if self.stateful:
            self.reset_states()
        self.built = True 
Example 26
Project: GRU-D   Author: PeterChe1990   File: grud_layers.py    License: MIT License 5 votes vote down vote up
def __call__(self, inputs, initial_state=None, **kwargs):
        # We skip `__call__` of `RNN` and `GRU` in this case and directly execute
        # GRUD's great-grandparent's method.
        inputs, initial_state = _standardize_grud_args(inputs, initial_state)

        if initial_state is None:
            return super(RNN, self).__call__(inputs, **kwargs)

        # If `initial_state` is specified and is Keras
        # tensors, then add it to the inputs and temporarily modify the
        # input_spec to include them.

        additional_inputs = []
        additional_specs = []
        kwargs['initial_state'] = initial_state
        additional_inputs += initial_state
        self.state_spec = [InputSpec(shape=K.int_shape(state))
                           for state in initial_state]
        additional_specs += self.state_spec
        # at this point additional_inputs cannot be empty
        is_keras_tensor = K.is_keras_tensor(additional_inputs[0])
        for tensor in additional_inputs:
            if K.is_keras_tensor(tensor) != is_keras_tensor:
                raise ValueError('The initial state or constants of an RNN'
                                 ' layer cannot be specified with a mix of'
                                 ' Keras tensors and non-Keras tensors'
                                 ' (a "Keras tensor" is a tensor that was'
                                 ' returned by a Keras layer, or by `Input`)')

        if is_keras_tensor:
            # Compute the full input spec, including state and constants
            full_input = inputs + additional_inputs
            full_input_spec = self.input_spec + additional_specs
            # Perform the call with temporarily replaced input_spec
            original_input_spec = self.input_spec
            self.input_spec = full_input_spec
            output = super(RNN, self).__call__(full_input, **kwargs)
            self.input_spec = original_input_spec
            return output
        return super(RNN, self).__call__(inputs, **kwargs) 
Example 27
Project: deep_qa   Author: allenai   File: attentive_gru.py    License: Apache License 2.0 5 votes vote down vote up
def build(self, input_shape):
        """
        This is used by Keras to verify things, but also to build the weights.
        The only differences from the Keras GRU (which we copied exactly
        other than the below) are:
        We generate weights with dimension input_dim[2] - 1, rather than
        dimension input_dim[2].
        There are a few variables which are created in non-'gpu' modes which
        are not required. These are commented out but left in for clarity below.
        """
        new_input_shape = list(input_shape)
        new_input_shape[2] -= 1
        super(AttentiveGru, self).build(tuple(new_input_shape))
        self.input_spec = [InputSpec(shape=input_shape)] 
Example 28
Project: deep_qa   Author: allenai   File: attentive_gru.py    License: Apache License 2.0 5 votes vote down vote up
def preprocess_input(self, inputs, training=None):
        """
        We have to override this preprocessing step, because if we are using the cpu,
        we do the weight - input multiplications in the internals of the GRU as separate,
        smaller matrix multiplications and concatenate them after. Therefore, before this
        happens, we split off the attention and then add it back afterwards.

        """
        if self.implementation == 0:

            attention = inputs[:, :, 0]  # Shape:(samples, knowledge_length)
            inputs = inputs[:, :, 1:]  # Shape:(samples, knowledge_length, word_dim)

            input_shape = self.input_spec[0].shape
            input_dim = input_shape[2] - 1
            timesteps = input_shape[1]

            x_z = _time_distributed_dense(inputs, self.kernel_z, self.bias_z,
                                          self.dropout, input_dim, self.units,
                                          timesteps, training=training)
            x_r = _time_distributed_dense(inputs, self.kernel_r, self.bias_r,
                                          self.dropout, input_dim, self.units,
                                          timesteps, training=training)
            x_h = _time_distributed_dense(inputs, self.kernel_h, self.bias_h,
                                          self.dropout, input_dim, self.units,
                                          timesteps, training=training)

            # Add attention back on to it's original place.
            return K.concatenate([K.expand_dims(attention, 2), x_z, x_r, x_h], axis=2)
        else:
            return inputs 
Example 29
Project: emailinsight   Author: andreykurenkov   File: kerasClassify.py    License: MIT License 5 votes vote down vote up
def evaluate_recurrent_model(dataset,num_classes):
    (X_train, Y_train), (X_test, Y_test) = dataset
    max_features = 20000
    maxlen = 125  # cut texts after this number of words (among top max_features most common words)
    batch_size = 32

    print(len(X_train), 'train sequences')
    print(len(X_test), 'test sequences')
    print("Pad sequences (samples x time) with maxlen %d"%maxlen)
    X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
    X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Build model...')
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(GRU(512))  # try using a GRU instead, for fun
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    # try using different optimizers and different optimizer configs
    model.compile(loss='categorical_crossentropy',optimizer='adam')

    print("Train...")
    model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=15,
              validation_data=(X_test, Y_test), show_accuracy=True)
    score, acc = model.evaluate(X_test, Y_test,
                                batch_size=batch_size,
                                show_accuracy=True)
    if verbose:
        print('Test score:', score)
        print('Test accuracy:', acc)
    return score[1] 
Example 30
Project: keras-molecules   Author: maxhodak   File: model.py    License: MIT License 5 votes vote down vote up
def _buildDecoder(self, z, latent_rep_size, max_length, charset_length):
        h = Dense(latent_rep_size, name='latent_input', activation = 'relu')(z)
        h = RepeatVector(max_length, name='repeat_vector')(h)
        h = GRU(501, return_sequences = True, name='gru_1')(h)
        h = GRU(501, return_sequences = True, name='gru_2')(h)
        h = GRU(501, return_sequences = True, name='gru_3')(h)
        return TimeDistributed(Dense(charset_length, activation='softmax'), name='decoded_mean')(h)