Python keras.layers.recurrent.GRU() Examples

The following are code examples for showing how to use keras.layers.recurrent.GRU(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: motion-classification   Author: matthiasplappert   File: base.py    MIT License 7 votes vote down vote up
def fit(self, X, y):
        assert isinstance(X, list)  #TODO: this should not be an assert
        assert len(y) > 0
        assert len(X) == len(y)

        X = pad_sequences(X)
        print X.shape, y.shape

        n_features = X.shape[2]
        self.n_labels_ = y.shape[1]
        print n_features, self.n_labels_

        model = Sequential()
        model.add(GRU(n_features, 128))
        model.add(Dropout(0.1))
        model.add(BatchNormalization(128))
        model.add(Dense(128, self.n_labels_))
        model.add(Activation('sigmoid'))

        sgd = opt.SGD(lr=0.005, decay=1e-6, momentum=0., nesterov=True)
        model.compile(loss='categorical_crossentropy', optimizer=sgd, class_mode='categorical')

        model.fit(X, y, batch_size=self.n_batch_size, nb_epoch=self.n_epochs, show_accuracy=True)
        self.model_ = model 
Example 2
Project: CAPTCHA-breaking   Author: lllcho   File: test_tasks.py    MIT License 6 votes vote down vote up
def test_temporal_clf(self):
        print('temporal classification data:')
        (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5,10), 
                                                             classification=True, nb_class=2)
        print('X_train:', X_train.shape)
        print('X_test:', X_test.shape)
        print('y_train:', y_train.shape)
        print('y_test:', y_test.shape)

        y_train = to_categorical(y_train)
        y_test = to_categorical(y_test)

        model = Sequential()
        model.add(GRU(X_train.shape[-1], y_train.shape[-1]))
        model.add(Activation('softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adadelta')
        history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2)
        self.assertTrue(history.history['val_acc'][-1] > 0.9) 
Example 3
Project: stratosphere-lstm   Author: mendozawow   File: neon_lstm.py    MIT License 6 votes vote down vote up
def build_lstm(input_shape):
    model = Sequential()
    # model.add(Masking(input_shape=input_shape, mask_value=-1.))
    model.add(Embedding(input_shape[0], 128, input_length=input_shape[1]))

    model.add(Convolution1D(nb_filter=64,
                            filter_length=5,
                            border_mode='valid',
                            activation='relu',
                            subsample_length=1))
    model.add(MaxPooling1D(pool_length=4))

    model.add(GRU(128))

    # model.add(GRU(128, return_sequences=False))
    # Add dropout if overfitting
    # model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model 
Example 4
Project: stratosphere-lstm   Author: mendozawow   File: dga_lstm.py    MIT License 6 votes vote down vote up
def build_lstm(input_shape):
    model = Sequential()
    # model.add(Masking(input_shape=input_shape, mask_value=-1.))
    model.add(Embedding(input_shape[0], 128, input_length=input_shape[1]))

    model.add(Convolution1D(nb_filter=64,
                            filter_length=5,
                            border_mode='valid',
                            activation='relu',
                            subsample_length=1))
    model.add(MaxPooling1D(pool_length=model.output_shape[1]))

    model.add(Flatten())

    model.add(Dense(128))

    # model.add(GRU(128, return_sequences=False))
    # Add dropout if overfitting
    # model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model 
Example 5
Project: RPGOne   Author: RTHMaK   File: knowledge_combiners.py    Apache License 2.0 6 votes vote down vote up
def build(self, input_shape):
        """
        This is used by Keras to verify things, but also to build the weights.
        The only differences from the Keras GRU (which we copied exactly
        other than the below) are:

        - We generate weights with dimension input_dim[2] - 1, rather than
          dimension input_dim[2].
        - There are a few variables which are created in non-'gpu' modes which
          are not required, and actually raise errors in Theano if you include them in
          the trainable weights(as Theano will alert you if you try to compute a gradient
          of a loss wrt a constant). These are commented out but left in for clarity below.
        """
        new_input_shape = list(input_shape)
        new_input_shape[2] -= 1
        super(AttentiveGRUKnowledgeCombiner, self).build(tuple(new_input_shape))
        self.input_spec = [InputSpec(shape=input_shape)] 
Example 6
Project: soph   Author: Linusp   File: understand.py    MIT License 6 votes vote down vote up
def understand_variable_length_handle():
    """用来帮助理解如何用 recurrent layer 处理变长序列"""
    model = Sequential()
    model.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
    model.compile(loss='mean_squared_error', optimizer='sgd')
    train_x = np.random.randn(100, 78, 256)
    train_y = np.random.randn(100, 78, 256)
    model.fit(train_x, train_y, verbose=0)

    inz_1 = np.random.randn(1, 78, 256)
    rez_1 = model.predict_proba(inz_1, verbose=0)

    inz_2 = np.random.randn(1, 87, 256)
    rez_2 = model.predict_proba(inz_2, verbose=0)

    print()
    print('=========== understand variable length =================')
    print('With `return_sequence=True`')
    print('Input shape is: {}, output shae is {}'.format(inz_1.shape, rez_1.shape))
    print('Input shape is: {}, output shae is {}'.format(inz_2.shape, rez_2.shape))
    print('====================== end =============================') 
Example 7
Project: soph   Author: Linusp   File: understand.py    MIT License 6 votes vote down vote up
def try_variable_length_train():
    """变长序列训练实验

    实验失败,这样得到的 train_x 和 train_y 的 dtype 是 object 类型,
    取其 shape 得到的是 (100,) ,这将导致训练出错
    """
    model = Sequential()
    model.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
    model.compile(loss='mean_squared_error', optimizer='sgd')

    train_x = []
    train_y = []
    for i in range(100):
        seq_length = np.random.randint(78, 87 + 1)
        sequence = []
        for _ in range(seq_length):
            sequence.append([np.random.randn() for _ in range(256)])

        train_x.append(np.array(sequence))
        train_y.append(np.array(sequence))

    train_x = np.array(train_x)
    train_y = np.array(train_y)

    model.fit(np.array(train_x), np.array(train_y)) 
Example 8
Project: LSTM-GRU-CNN-MLP   Author: ansleliu   File: gru.py    MIT License 6 votes vote down vote up
def build_model(layers):
    model = Sequential()

    model.add(GRU(input_dim=layers[0], output_dim=layers[1], activation='tanh', return_sequences=True))
    model.add(Dropout(0.15))  # Dropout overfitting

    # model.add(GRU(layers[2],activation='tanh', return_sequences=True))
    # model.add(Dropout(0.2))  # Dropout overfitting

    model.add(GRU(layers[2], activation='tanh', return_sequences=False))
    model.add(Dropout(0.15))  # Dropout overfitting

    model.add(Dense(output_dim=layers[3]))
    model.add(Activation("linear"))

    start = time.time()
    # sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    # model.compile(loss="mse", optimizer=sgd)
    model.compile(loss="mse", optimizer="rmsprop") # Nadam rmsprop
    print "Compilation Time : ", time.time() - start
    return model 
Example 9
Project: TrafficFlowPrediction   Author: zbj6633   File: model.py    MIT License 6 votes vote down vote up
def get_gru(units):
    """GRU(Gated Recurrent Unit)
    Build GRU Model.

    # Arguments
        units: List(int), number of input, output and hidden units.
    # Returns
        model: Model, nn model.
    """

    model = Sequential()
    model.add(GRU(units[1], input_shape=(units[0], 1), return_sequences=True))
    model.add(GRU(units[2]))
    model.add(Dropout(0.2))
    model.add(Dense(units[3], activation='sigmoid'))

    return model 
Example 10
Project: CDANs   Author: a-gardner1   File: Postures.py    MIT License 6 votes vote down vote up
def makeAutoencoder(input, maxNumTimesteps, numFeatures, 
                    encodedSize=11, hiddenSize=11, 
                    numRNNLayers=1, bidirectional=False,
                    l2Reg = 0.0001, 
                    dropoutH = 0,
                    activation='relu', 
                    numDeepLayers=1, deepNetSize=11,
                    makeSymmetric = False):
    encoder = makePostureRNNModel(input, numClasses=encodedSize,
                                  hiddenSize=hiddenSize, 
                                  numRNNLayers=numRNNLayers, bidirectional=bidirectional,
                                  l2Reg = l2Reg, useGRU=True,
                                  dropoutH = dropoutH, activation=activation, 
                                  numDeepLayers=numDeepLayers, deepNetSize=deepNetSize,
                                  makeSymmetric = makeSymmetric, stripSoftmax = True)
    autoencoder = RepeatVector(maxNumTimesteps)(encoder)
    #for j in xrange(numDeepLayers):
    #    decoder = TimeDistributed(Dense(deepNetSize, activation = activation, W_regularizer = l2(l2Reg)))(autoencoder)
    for i in xrange(numRNNLayers):
        autoencoder = GRU(hiddenSize, return_sequences = True, 
                          W_regularizer= l2(l2Reg))(autoencoder)
        if dropoutH and i < numRNNLayers - 1:
            autoencoder = Dropout(dropoutH)(autoencoder)
    autoencoder = TimeDistributed(Dense(numFeatures, activation = 'linear'))(autoencoder)
    return encoder, autoencoder 
Example 11
Project: BOP2017   Author: crh19970307   File: output.py    MIT License 5 votes vote down vote up
def train():
	data=loadfromjson()
	taglist=[]
	for index,item in enumerate(data['datalist']):
		if item[0]=='0':
			taglist.append(0)
		else:
			if item[0]=='1':
				taglist.append(1)
			else:
				print('ERROR\n')
				print(index)
		#print(len(data['vectorlist'][index]))
	#xa=np.array(data['vectorlist'])
	xa=np.zeros((len(data['vectorlist']),50,60),dtype='float64')
	for index1,items in enumerate(data['vectorlist']):
		for index2,item2 in enumerate(items):
			if index2==50:
				break
			xa[index1][index2]=item2

	#xa=np.random.rand(len(data['vectorlist']),50,60)
	ya=np.array(taglist)
	#print(np.size(xa))
	#print(np.size(ya))
	print('Build model...')  
	model = Sequential()  
	#model.add(Embedding(60,32))  
	#model.add(LSTM(128)) # try using a GRU instead, for fun  
	#model.add(LSTM(32,input_shape=(10,60)))
	#model.add(LSTM(32,input_length=50,input_dim=60))
	model.add(LSTM(32,input_dim=60))
	print('LSTM added')
	model.add(Dropout(0.5))  	
	model.add(Dense( 1))  
	model.add(Activation('sigmoid'))  
	model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary")  
	model.fit(xa, ya, batch_size=16, nb_epoch=100) #训练时间为若干个小时  
	model.save('my_model.h5') 
Example 12
Project: BOP2017   Author: crh19970307   File: output.py    MIT License 5 votes vote down vote up
def train():
	data=loadfromjson()
	taglist=[]
	for index,item in enumerate(data['datalist']):
		if item[0]=='0':
			taglist.append(0)
		else:
			if item[0]=='1':
				taglist.append(1)
			else:
				print('ERROR\n')
				print(index)
		#print(len(data['vectorlist'][index]))
	#xa=np.array(data['vectorlist'])
	xa=np.zeros((len(data['vectorlist']),50,60),dtype='float64')
	for index1,items in enumerate(data['vectorlist']):
		for index2,item2 in enumerate(items):
			if index2==50:
				break
			xa[index1][index2]=item2

	#xa=np.random.rand(len(data['vectorlist']),50,60)
	ya=np.array(taglist)
	#print(np.size(xa))
	#print(np.size(ya))
	print('Build model...')  
	model = Sequential()  
	#model.add(Embedding(60,32))  
	#model.add(LSTM(128)) # try using a GRU instead, for fun  
	#model.add(LSTM(32,input_shape=(10,60)))
	#model.add(LSTM(32,input_length=50,input_dim=60))
	model.add(LSTM(32,input_dim=60))
	print('LSTM added')
	model.add(Dropout(0.5))  	
	model.add(Dense( 1))  
	model.add(Activation('sigmoid'))  
	model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary")  
	model.fit(xa, ya, batch_size=16, nb_epoch=100)
	model.save('my_model.h5') 
Example 13
Project: BOP2017   Author: crh19970307   File: train.py    MIT License 5 votes vote down vote up
def train():
	data=loadfromjson()
	taglist=[]
	for index,item in enumerate(data['datalist']):
		if item[0]=='0':
			taglist.append(0)
		else:
			if item[0]=='1':
				taglist.append(1)
			else:
				print('EiRROR\n')
				print(index)
				taglist.append(0)
		#print(len(data['vectorlist'][index]))
	#xa=np.array(data['vectorlist'])
	xa=np.zeros((len(data['vectorlist']),30,60),dtype='float64')
	for index1,items in enumerate(data['vectorlist']):
		for index2,item2 in enumerate(items):
			if index2==30:
				break
			xa[index1][index2]=item2

	#xa=np.random.rand(len(data['vectorlist']),50,60)
	ya=np.array(taglist)
	#print(np.size(xa))
	#print(np.size(ya))
	print('Build model...')  
	model = Sequential()  
	#model.add(Embedding(60,32))  
	#model.add(LSTM(128)) # try using a GRU instead, for fun  
	#model.add(LSTM(32,input_shape=(10,60)))
	#model.add(LSTM(32,input_length=50,input_dim=60))
	model.add(LSTM(128,input_length=30,input_dim=60))
	print('LSTM added')
	model.add(Dropout(0.5))  	
	model.add(Dense( 1))  
	model.add(Activation('sigmoid'))  
	model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary")  
	model.fit(xa, ya, batch_size=128, nb_epoch=100) #训练时间为若干个小时  
	model.save('sentencelen30_230000_lstm128_epoch200_model.h5') 
Example 14
Project: cmu-ammml-project   Author: jayanthkoushik   File: gru2.py    MIT License 5 votes vote down vote up
def GRU2(vocab_size, embedding_size, max_feats, hidden_layer_size,
        dropout_prob):
    model = Sequential()
    model.add(Embedding(input_dim=vocab_size, output_dim=embedding_size,
                        input_length=max_feats))
    model.add(GRU(output_dim=hidden_layer_size, activation="tanh",
                return_sequences=True))
    model.add(Dropout(dropout_prob))
    model.add(GRU(output_dim=hidden_layer_size, activation="tanh"))
    model.add(Dropout(dropout_prob))
    model.add(Dense(output_dim=1, activation="sigmoid"))
    return model 
Example 15
Project: lipnet   Author: osalinasv   File: layers.py    Apache License 2.0 5 votes vote down vote up
def create_bi_gru_layer(name: str, input_layer, units: int = GRU_UNITS, activation: str = GRU_ACTIVATION) -> Bidirectional:
	return Bidirectional(GRU(units, return_sequences=True, activation=activation, kernel_initializer=GRU_KERNEL_INIT, name=name), merge_mode='concat')(input_layer) 
Example 16
Project: CAPTCHA-breaking   Author: lllcho   File: test_tasks.py    MIT License 5 votes vote down vote up
def test_temporal_reg(self):
        print('temporal regression data:')
        (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5, 10), output_shape=(2,),
                                                             classification=False)
        print('X_train:', X_train.shape)
        print('X_test:', X_test.shape)
        print('y_train:', y_train.shape)
        print('y_test:', y_test.shape)

        model = Sequential()
        model.add(GRU(X_train.shape[-1], y_train.shape[-1]))
        model.compile(loss='hinge', optimizer='adam')
        history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2)
        self.assertTrue(history.history['val_loss'][-1] < 0.8) 
Example 17
Project: stratosphere-lstm   Author: mendozawow   File: dga_lstm.py    MIT License 5 votes vote down vote up
def build_lstm(input_shape):
    model = Sequential()
    model.add(Masking(input_shape=input_shape, mask_value=-1.))
    # model.add(GRU(128, return_sequences=True))

    model.add(GRU(128, return_sequences=False))
    # Add dropout if overfitting
    # model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    return model 
Example 18
Project: RPGOne   Author: RTHMaK   File: knowledge_encoders.py    Apache License 2.0 5 votes vote down vote up
def __init__(self, params: Dict[str, Any]):
        self.knowledge_length = params.pop('knowledge_length')
        self.encoding_dim = params.pop('encoding_dim')
        self.has_multiple_backgrounds = params.pop('has_multiple_backgrounds')
        super(BiGRUKnowledgeEncoder, self).__init__(params)
        # TODO: allow the merge_mode of the GRU/other parameters to be passed as arguments.
        self.bi_gru = Bidirectional(GRU(self.encoding_dim, return_sequences=True),
                                    input_shape=(self.knowledge_length, self.encoding_dim),
                                    merge_mode='sum', name='{}_bi_gru'.format(self.name))
        if self.has_multiple_backgrounds:
            # pylint: disable=redefined-variable-type
            self.bi_gru = EncoderWrapper(self.bi_gru, name='wrapped_{}'.format(self.name)) 
Example 19
Project: RPGOne   Author: RTHMaK   File: knowledge_combiners.py    Apache License 2.0 5 votes vote down vote up
def preprocess_input(self, inputs, training=None):
        '''
        We have to override this preprocessing step, because if we are using the cpu,
        we do the weight - input multiplications in the internals of the GRU as seperate,
        smaller matrix multiplications and concatenate them after. Therefore, before this
        happens, we split off the attention and then add it back afterwards.
        '''
        if self.implementation == 0:

            attention = inputs[:, :, 0]  # Shape:(samples, knowledge_length)
            inputs = inputs[:, :, 1:]  # Shape:(samples, knowledge_length, word_dim)

            input_shape = self.input_spec[0].shape
            input_dim = input_shape[2] - 1
            timesteps = input_shape[1]

            x_z = _time_distributed_dense(inputs, self.kernel_z, self.bias_z,
                                          self.dropout, input_dim, self.units,
                                          timesteps, training=training)
            x_r = _time_distributed_dense(inputs, self.kernel_r, self.bias_r,
                                          self.dropout, input_dim, self.units,
                                          timesteps, training=training)
            x_h = _time_distributed_dense(inputs, self.kernel_h, self.bias_h,
                                          self.dropout, input_dim, self.units,
                                          timesteps, training=training)

            # Add attention back on to it's original place.
            return K.concatenate([K.expand_dims(attention, 2), x_z, x_r, x_h], axis=2)
        else:
            return inputs

# The first item added here will be used as the default in some cases. 
Example 20
Project: soph   Author: Linusp   File: understand.py    MIT License 5 votes vote down vote up
def understand_return_sequence():
    """用来帮助理解 recurrent layer 中的 return_sequences 参数"""
    model_1 = Sequential()
    model_1.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
    model_1.compile(loss='mean_squared_error', optimizer='sgd')
    train_x = np.random.randn(100, 78, 256)
    train_y = np.random.randn(100, 78, 256)
    model_1.fit(train_x, train_y, verbose=0)

    model_2 = Sequential()
    model_2.add(GRU(input_dim=256, output_dim=256, return_sequences=False))
    model_2.compile(loss='mean_squared_error', optimizer='sgd')
    train_x = np.random.randn(100, 78, 256)
    train_y = np.random.randn(100, 256)
    model_2.fit(train_x, train_y, verbose=0)

    inz = np.random.randn(100, 78, 256)
    rez_1 = model_1.predict_proba(inz, verbose=0)
    rez_2 = model_2.predict_proba(inz, verbose=0)

    print()
    print('=========== understand return_sequence =================')
    print('Input shape is: {}'.format(inz.shape))
    print('Output shape of model with `return_sequences=True`: {}'.format(rez_1.shape))
    print('Output shape of model with `return_sequences=False`: {}'.format(rez_2.shape))
    print('====================== end =============================') 
Example 21
Project: soph   Author: Linusp   File: understand.py    MIT License 5 votes vote down vote up
def try_variable_length_train_in_batch():
    """变长序列训练实验(2)"""
    model = Sequential()
    model.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
    model.compile(loss='mean_squared_error', optimizer='sgd')

    # 分作两个 batch, 不同 batch 中的 sequence 长度不一样
    seq_lens = [78, 87]
    for i in range(2):
        train_x = np.random.randn(20, seq_lens[i], 256)
        train_y = np.random.randn(20, seq_lens[i], 256)
        model.train_on_batch(train_x, train_y) 
Example 22
Project: soph   Author: Linusp   File: pig_latin.py    MIT License 5 votes vote down vote up
def build_model(input_size, seq_len, hidden_size):
    """建立一个 sequence to sequence 模型"""
    model = Sequential()
    model.add(GRU(input_dim=input_size, output_dim=hidden_size, return_sequences=False))
    model.add(Dense(hidden_size, activation="relu"))
    model.add(RepeatVector(seq_len))
    model.add(GRU(hidden_size, return_sequences=True))
    model.add(TimeDistributed(Dense(output_dim=input_size, activation="linear")))
    model.compile(loss="mse", optimizer='adam')

    return model 
Example 23
Project: cervantes   Author: textclf   File: models.py    MIT License 5 votes vote down vote up
def _generate_model(self, lembedding, num_classes=2, unit='gru', rnn_size=128, train_vectors=True):

        model = Sequential()
        if lembedding.vector_box.W is None:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            W_constraint=None)
        else:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            weights=[lembedding.vector_box.W], W_constraint=None)
        emb.trainable = train_vectors
        model.add(emb)
        if unit == 'gru':
            model.add(GRU(rnn_size))
        else:
            model.add(LSTM(rnn_size))
        model.add(Dropout(0.2))
        if num_classes == 2:
            model.add(Dense(1, activation='sigmoid'))
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.add(Dense(num_classes, activation='softmax'))
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model 
Example 24
Project: cervantes   Author: textclf   File: models.py    MIT License 5 votes vote down vote up
def _generate_model(self, lembedding, num_classes=2, unit='gru', rnn_size=128, train_vectors=True):

        input = Input(shape=(lembedding.size,), dtype='int32')
        if lembedding.vector_box.W is None:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            W_constraint=None)(input)
        else:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            weights=[lembedding.vector_box.W], W_constraint=None, )(input)
        emb.trainable = train_vectors
        if unit == 'gru':
            forward = GRU(rnn_size)(emb)
            backward = GRU(rnn_size, go_backwards=True)(emb)
        else:
            forward = LSTM(rnn_size)(emb)
            backward = LSTM(rnn_size, go_backwards=True)(emb)

        merged_rnn = merge([forward, backward], mode='concat')
        dropped = Dropout(0.5)(merged_rnn)
        if num_classes == 2:
            out = Dense(1, activation='sigmoid')(dropped)
            model = Model(input=input, output=out)
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            out = Dense(num_classes, activation='softmax')(dropped)
            model = Model(input=input, output=out)
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model 
Example 25
Project: cervantes   Author: textclf   File: models.py    MIT License 5 votes vote down vote up
def __init__(self, lembedding, num_classes=2, ngrams=[1, 2, 3, 4, 5],
                 nfilters=64, rnn_type=GRU, rnn_dim=80, train_vectors=True,
                 optimizer=None):

        if not isinstance(lembedding, TwoLevelsEmbedding):
            raise LanguageClassifierException(
                "The model only accepts two-level language embeddings")
        if num_classes < 2:
            raise LanguageClassifierException("Classes must be 2 or more")

        self.optimizer = optimizer
        model = self._generate_model(lembedding, num_classes, ngrams,
                                     nfilters, rnn_type, rnn_dim, train_vectors)
        super(RCNNClassifier, self).__init__(model, self.optimizer) 
Example 26
Project: DeepSequenceClassification   Author: napsternxg   File: model.py    GNU General Public License v2.0 5 votes vote down vote up
def gen_model(vocab_size=100, embedding_size=128, maxlen=100, output_size=6, hidden_layer_size=100, num_hidden_layers = 1, RNN_LAYER_TYPE="LSTM"):
    RNN_CLASS = LSTM
    if RNN_LAYER_TYPE == "GRU":
        RNN_CLASS = GRU
    logger.info("Parameters: vocab_size = %s, embedding_size = %s, maxlen = %s, output_size = %s, hidden_layer_size = %s, " %\
            (vocab_size, embedding_size, maxlen, output_size, hidden_layer_size))
    logger.info("Building Model")
    model = Sequential()
    logger.info("Init Model with vocab_size = %s, embedding_size = %s, maxlen = %s" % (vocab_size, embedding_size, maxlen))
    model.add(Embedding(vocab_size, embedding_size, input_length=maxlen))
    logger.info("Added Embedding Layer")
    model.add(Dropout(0.5))
    logger.info("Added Dropout Layer")
    for i in xrange(num_hidden_layers):
        model.add(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True))
        logger.info("Added %s Layer" % RNN_LAYER_TYPE)
        model.add(Dropout(0.5))
        logger.info("Added Dropout Layer")
    model.add(RNN_CLASS(output_dim=output_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True))
    logger.info("Added %s Layer" % RNN_LAYER_TYPE)
    model.add(Dropout(0.5))
    logger.info("Added Dropout Layer")
    model.add(TimeDistributedDense(output_size, activation="softmax"))
    logger.info("Added Dropout Layer")
    logger.info("Created model with following config:\n%s" % json.dumps(model.get_config(), indent=4))
    logger.info("Compiling model with optimizer %s" % optimizer)
    start_time = time.time()
    model.compile(loss='categorical_crossentropy', optimizer=optimizer)
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f seconds." % total_time)
    return model 
Example 27
Project: DeepSequenceClassification   Author: napsternxg   File: model.py    GNU General Public License v2.0 5 votes vote down vote up
def gen_model_brnn(vocab_size=100, embedding_size=128, maxlen=100, output_size=6, hidden_layer_size=100, num_hidden_layers = 1, RNN_LAYER_TYPE="LSTM"):
    RNN_CLASS = LSTM
    if RNN_LAYER_TYPE == "GRU":
        RNN_CLASS = GRU
    logger.info("Parameters: vocab_size = %s, embedding_size = %s, maxlen = %s, output_size = %s, hidden_layer_size = %s, " %\
            (vocab_size, embedding_size, maxlen, output_size, hidden_layer_size))
    logger.info("Building Graph model for Bidirectional RNN")
    model = Graph()
    model.add_input(name='input', input_shape=(maxlen,), dtype=int)
    logger.info("Added Input node")
    logger.info("Init Model with vocab_size = %s, embedding_size = %s, maxlen = %s" % (vocab_size, embedding_size, maxlen))
    model.add_node(Embedding(vocab_size, embedding_size, input_length=maxlen), name='embedding', input='input')
    logger.info("Added Embedding node")
    model.add_node(Dropout(0.5), name="dropout_0", input="embedding")
    logger.info("Added Dropout Node")
    for i in xrange(num_hidden_layers):
        last_dropout_name = "dropout_%s" % i
        forward_name, backward_name, dropout_name = ["%s_%s" % (k, i + 1) for k in ["forward", "backward", "dropout"]]
        model.add_node(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True), name=forward_name, input=last_dropout_name)
        logger.info("Added %s forward node[%s]" % (RNN_LAYER_TYPE, i+1))
        model.add_node(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True, go_backwards=True), name=backward_name, input=last_dropout_name)
        logger.info("Added %s backward node[%s]" % (RNN_LAYER_TYPE, i+1))
        model.add_node(Dropout(0.5), name=dropout_name, inputs=[forward_name, backward_name])
        logger.info("Added Dropout node[%s]" % (i+1))
    model.add_node(TimeDistributedDense(output_size, activation="softmax"), name="tdd", input=dropout_name)
    logger.info("Added TimeDistributedDense node")
    model.add_output(name="output", input="tdd")
    logger.info("Added Output node")
    logger.info("Created model with following config:\n%s" % model.get_config())
    logger.info("Compiling model with optimizer %s" % optimizer)
    start_time = time.time()
    model.compile(optimizer, {"output": 'categorical_crossentropy'})
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f seconds." % total_time)
    return model 
Example 28
Project: DeepSequenceClassification   Author: napsternxg   File: model.py    GNU General Public License v2.0 5 votes vote down vote up
def gen_model_brnn_multitask(vocab_size=100, embedding_size=128, maxlen=100, output_size=[6, 96], hidden_layer_size=100, num_hidden_layers = 1, RNN_LAYER_TYPE="LSTM"):
    RNN_CLASS = LSTM
    if RNN_LAYER_TYPE == "GRU":
        RNN_CLASS = GRU
    logger.info("Parameters: vocab_size = %s, embedding_size = %s, maxlen = %s, output_size = %s, hidden_layer_size = %s, " %\
            (vocab_size, embedding_size, maxlen, output_size, hidden_layer_size))
    logger.info("Building Graph model for Bidirectional RNN")
    model = Graph()
    model.add_input(name='input', input_shape=(maxlen,), dtype=int)
    logger.info("Added Input node")
    logger.info("Init Model with vocab_size = %s, embedding_size = %s, maxlen = %s" % (vocab_size, embedding_size, maxlen))
    model.add_node(Embedding(vocab_size, embedding_size, input_length=maxlen, mask_zero=True), name='embedding', input='input')
    logger.info("Added Embedding node")
    model.add_node(Dropout(0.5), name="dropout_0", input="embedding")
    logger.info("Added Dropout Node")
    for i in xrange(num_hidden_layers):
        last_dropout_name = "dropout_%s" % i
        forward_name, backward_name, dropout_name = ["%s_%s" % (k, i + 1) for k in ["forward", "backward", "dropout"]]
        model.add_node(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True), name=forward_name, input=last_dropout_name)
        logger.info("Added %s forward node[%s]" % (RNN_LAYER_TYPE, i+1))
        model.add_node(RNN_CLASS(output_dim=hidden_layer_size, activation='sigmoid', inner_activation='hard_sigmoid', return_sequences=True, go_backwards=True), name=backward_name, input=last_dropout_name)
        logger.info("Added %s backward node[%s]" % (RNN_LAYER_TYPE, i+1))
        model.add_node(Dropout(0.5), name=dropout_name, inputs=[forward_name, backward_name])
        logger.info("Added Dropout node[%s]" % (i+1))
    output_names = []
    for i, output_task_size in enumerate(output_size):
        tdd_name, output_name = "tdd_%s" % i, "output_%s" % i
        model.add_node(TimeDistributedDense(output_task_size, activation="softmax"), name=tdd_name, input=dropout_name)
        logger.info("Added TimeDistributedDense node %s with output_size %s" % (i, output_task_size))
        model.add_output(name=output_name, input=tdd_name)
        output_names.append(output_name)
    logger.info("Added Output node")
    logger.info("Created model with following config:\n%s" % model.get_config())
    logger.info("Compiling model with optimizer %s" % optimizer)
    start_time = time.time()
    model.compile(optimizer, {k: 'categorical_crossentropy' for k in output_names})
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f seconds." % total_time)
    return model, output_names 
Example 29
Project: distnet   Author: ssamot   File: layers.py    GNU General Public License v2.0 5 votes vote down vote up
def get_config(self):
        config = {"name": self.__class__.__name__,
                  "output_dim": self.output_dim,
                  "init": self.init.__name__,
                  "inner_init": self.inner_init.__name__,
                  "activation": self.activation.__name__,
                  "inner_activation": self.inner_activation.__name__,
                  "truncate_gradient": self.truncate_gradient,
                  "return_sequences": self.return_sequences,
                  "input_dim": self.input_dim,
                  "input_length": self.input_length}
        base_config = super(GRU, self).get_config()
        return dict(list(base_config.items()) + list(config.items())) 
Example 30
Project: ntm_keras   Author: flomlo   File: ntm.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def build(self, input_shape):
        bs, input_length, input_dim = input_shape

        self.controller_input_dim, self.controller_output_dim = controller_input_output_shape(
                input_dim, self.units, self.m_depth, self.n_slots, self.shift_range, self.read_heads,
                self.write_heads)
            
        # Now that we've calculated the shape of the controller, we have add it to the layer/model.
        if self.controller is None:
            self.controller = Dense(
                name = "controller",
                activation = 'linear',
                bias_initializer = 'zeros',
                units = self.controller_output_dim,
                input_shape = (bs, input_length, self.controller_input_dim))
            self.controller.build(input_shape=(self.batch_size, input_length, self.controller_input_dim))
            self.controller_with_state = False


        # This is a fixed shift matrix
        self.C = _circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.controller.trainable_weights 

        # We need to declare the number of states we want to carry around.
        # In our case the dimension seems to be 6 (LSTM) or 5 (GRU) or 4 (FF),
        # see self.get_initial_states, those respond to:
        # [old_ntm_output] + [init_M, init_wr, init_ww] +  [init_h] (LSMT and GRU) + [(init_c] (LSTM only))
        # old_ntm_output does not make sense in our world, but is required by the definition of the step function we
        # intend to use.
        # WARNING: What self.state_spec does is only poorly understood,
        # I only copied it from keras/recurrent.py.
        self.states = [None, None, None, None]
        self.state_spec = [InputSpec(shape=(None, self.output_dim)),                            # old_ntm_output
                            InputSpec(shape=(None, self.n_slots, self.m_depth)),                # Memory
                            InputSpec(shape=(None, self.read_heads, self.n_slots)),   # weights_read
                            InputSpec(shape=(None, self.write_heads, self.n_slots))]  # weights_write

        super(NeuralTuringMachine, self).build(input_shape) 
Example 31
Project: allen-ai-science-qa   Author: arranger1044   File: utils.py    GNU General Public License v3.0 5 votes vote down vote up
def get_recurrent_layer(model_name, input_size, output_size, return_sequences=False):
    layer = None
    if model_name == 'rnn':
        layer = SimpleRNN(input_dim=input_size, output_dim=output_size, return_sequences=return_sequences)
    elif model_name == 'lstm':
        layer = LSTM(input_dim=input_size, output_dim=output_size, return_sequences=return_sequences)
    elif model_name == 'gru':
        layer = GRU(input_dim=input_size, output_dim=output_size, return_sequences=return_sequences)
    if layer is None:
        raise ValueError('Unknown recurrent layer: %s' % model_name)
    return layer 
Example 32
Project: functionality-scenario-test-A-2018   Author: TW-NCHC   File: train.py    GNU General Public License v3.0 5 votes vote down vote up
def create_model(input_shape=(30, 120, 176, 1), rnn_units=128, cnn_units=32, num_gpu=1, nb_category=10):
    # define our time-distributed setup
    inp = Input(shape=input_shape)

    x = TimeDistributed(Conv2D(cnn_units, (3, 3), padding='same', activation='relu'))(inp)
    x = TimeDistributed(Conv2D(cnn_units, (3, 3), padding='same', activation='relu'))(x)
    x = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x)
    x = Dropout(.5)(x)

    x = TimeDistributed(Conv2D(cnn_units, (3, 3), padding='same', activation='relu'))(x)
    x = TimeDistributed(Conv2D(cnn_units, (3, 3), padding='same', activation='relu'))(x)
    x = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x)
    x = Dropout(.5)(x)

    x = TimeDistributed(Flatten())(x)
    x = GRU(units=rnn_units, return_sequences=True)(x)
    x = Dropout(.5)(x)

    x = TimeDistributed(Flatten())(x)
    x = GRU(units=rnn_units, return_sequences=True)(x)
    x = Dropout(.5)(x)

    x = GRU(units=rnn_units, return_sequences=False)(x)

    x = Dropout(.5)(x)
    x = Dense(nb_category, activation='softmax')(x)

    opt_adm = Adam()
    model = Model(inp, x)
    if num_gpu > 1:
        model = multi_gpu_model(model, gpus=num_gpu) # gpus
    model.compile(optimizer=opt_adm, loss='categorical_crossentropy', metrics=['accuracy'])
    print ("model paerms: %s"%model.count_params())
    return model 
Example 33
Project: LipNet   Author: rizkiarm   File: model.py    MIT License 5 votes vote down vote up
def build(self):
        if K.image_data_format() == 'channels_first':
            input_shape = (self.img_c, self.frames_n, self.img_w, self.img_h)
        else:
            input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c)

        self.input_data = Input(name='the_input', shape=input_shape, dtype='float32')

        self.zero1 = ZeroPadding3D(padding=(1, 2, 2), name='zero1')(self.input_data)
        self.conv1 = Conv3D(32, (3, 5, 5), strides=(1, 2, 2), activation='relu', kernel_initializer='he_normal', name='conv1')(self.zero1)
        self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max1')(self.conv1)
        self.drop1 = Dropout(0.5)(self.maxp1)

        self.zero2 = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(self.drop1)
        self.conv2 = Conv3D(64, (3, 5, 5), strides=(1, 1, 1), activation='relu', kernel_initializer='he_normal', name='conv2')(self.zero2)
        self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max2')(self.conv2)
        self.drop2 = Dropout(0.5)(self.maxp2)

        self.zero3 = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(self.drop2)
        self.conv3 = Conv3D(96, (3, 3, 3), strides=(1, 1, 1), activation='relu', kernel_initializer='he_normal', name='conv3')(self.zero3)
        self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max3')(self.conv3)
        self.drop3 = Dropout(0.5)(self.maxp3)

        self.resh1 = TimeDistributed(Flatten())(self.drop3)

        self.gru_1 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru1'), merge_mode='concat')(self.resh1)
        self.gru_2 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru2'), merge_mode='concat')(self.gru_1)

        # transforms RNN output to character activations:
        self.dense1 = Dense(self.output_size, kernel_initializer='he_normal', name='dense1')(self.gru_2)

        self.y_pred = Activation('softmax', name='softmax')(self.dense1)

        self.labels = Input(name='the_labels', shape=[self.absolute_max_string_len], dtype='float32')
        self.input_length = Input(name='input_length', shape=[1], dtype='int64')
        self.label_length = Input(name='label_length', shape=[1], dtype='int64')

        self.loss_out = CTC('ctc', [self.y_pred, self.labels, self.input_length, self.label_length])

        self.model = Model(inputs=[self.input_data, self.labels, self.input_length, self.label_length], outputs=self.loss_out) 
Example 34
Project: GRU-D   Author: PeterChe1990   File: grud_layers.py    MIT License 5 votes vote down vote up
def build(self, input_shape):
        # Note input_shape will be list of shapes of initial states
        # if these are passed in __call__.

        if not isinstance(input_shape, list) or len(input_shape) <= 2:
            raise ValueError('input_shape of GRU-D should be a list of at least 3.')
        input_shape = input_shape[:3]

        batch_size = input_shape[0][0] if self.stateful else None
        self.input_spec[0] = InputSpec(shape=(batch_size, None, input_shape[0][-1]))
        self.input_spec[1] = InputSpec(shape=(batch_size, None, input_shape[1][-1]))
        self.input_spec[2] = InputSpec(shape=(batch_size, None, 1))

        # allow GRUDCell to build before we set or validate state_spec
        step_input_shape = [(i_s[0],) + i_s[2:] for i_s in input_shape]
        self.cell.build(step_input_shape)

        # set or validate state_spec
        state_size = list(self.cell.state_size)

        if self.state_spec is not None:
            # initial_state was passed in call, check compatibility
            if [spec.shape[-1] for spec in self.state_spec] != state_size:
                raise ValueError(
                    'An `initial_state` was passed that is not compatible with '
                    '`cell.state_size`. Received `state_spec`={}; '
                    'however `cell.state_size` is '
                    '{}'.format(self.state_spec, self.cell.state_size))
        else:
            self.state_spec = [InputSpec(shape=(None, dim))
                               for dim in state_size]
        if self.stateful:
            self.reset_states()
        self.built = True 
Example 35
Project: GRU-D   Author: PeterChe1990   File: grud_layers.py    MIT License 5 votes vote down vote up
def __call__(self, inputs, initial_state=None, **kwargs):
        # We skip `__call__` of `RNN` and `GRU` in this case and directly execute
        # GRUD's great-grandparent's method.
        inputs, initial_state = _standardize_grud_args(inputs, initial_state)

        if initial_state is None:
            return super(RNN, self).__call__(inputs, **kwargs)

        # If `initial_state` is specified and is Keras
        # tensors, then add it to the inputs and temporarily modify the
        # input_spec to include them.

        additional_inputs = []
        additional_specs = []
        kwargs['initial_state'] = initial_state
        additional_inputs += initial_state
        self.state_spec = [InputSpec(shape=K.int_shape(state))
                           for state in initial_state]
        additional_specs += self.state_spec
        # at this point additional_inputs cannot be empty
        is_keras_tensor = K.is_keras_tensor(additional_inputs[0])
        for tensor in additional_inputs:
            if K.is_keras_tensor(tensor) != is_keras_tensor:
                raise ValueError('The initial state or constants of an RNN'
                                 ' layer cannot be specified with a mix of'
                                 ' Keras tensors and non-Keras tensors'
                                 ' (a "Keras tensor" is a tensor that was'
                                 ' returned by a Keras layer, or by `Input`)')

        if is_keras_tensor:
            # Compute the full input spec, including state and constants
            full_input = inputs + additional_inputs
            full_input_spec = self.input_spec + additional_specs
            # Perform the call with temporarily replaced input_spec
            original_input_spec = self.input_spec
            self.input_spec = full_input_spec
            output = super(RNN, self).__call__(full_input, **kwargs)
            self.input_spec = original_input_spec
            return output
        return super(RNN, self).__call__(inputs, **kwargs) 
Example 36
Project: Email-Classification-NNs   Author: 01dkg   File: kerasClassify.py    GNU General Public License v3.0 5 votes vote down vote up
def evaluate_recurrent_model(dataset,num_classes):
    (X_train, Y_train), (X_test, Y_test) = dataset
    max_features = 20000
    maxlen = 125  # cut texts after this number of words (among top max_features most common words)
    batch_size = 32

    print(len(X_train), 'train sequences')
    print(len(X_test), 'test sequences')
    print("Pad sequences (samples x time) with maxlen %d"%maxlen)
    X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
    X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Build model...')
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(GRU(512))  # try using a GRU instead, for fun
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    # try using different optimizers and different optimizer configs
    model.compile(loss='categorical_crossentropy',optimizer='adam')

    print("Train...")
    model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=15,
              validation_data=(X_test, Y_test), show_accuracy=True)
    score, acc = model.evaluate(X_test, Y_test,
                                batch_size=batch_size,
                                show_accuracy=True)
    if verbose:
        print('Test score:', score)
        print('Test accuracy:', acc)
    return score[1] 
Example 37
Project: Email-Classification-NNs   Author: 01dkg   File: kerasClassify - Copy.py    GNU General Public License v3.0 5 votes vote down vote up
def evaluate_recurrent_model(dataset,num_classes):
    (X_train, Y_train), (X_test, Y_test) = dataset
    max_features = 20000
    maxlen = 125  # cut texts after this number of words (among top max_features most common words)
    batch_size = 32

    print(len(X_train), 'train sequences')
    print(len(X_test), 'test sequences')
    print("Pad sequences (samples x time) with maxlen %d"%maxlen)
    X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
    X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Build model...')
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(GRU(512))  # try using a GRU instead, for fun
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    # try using different optimizers and different optimizer configs
    model.compile(loss='categorical_crossentropy',optimizer='adam')

    print("Train...")
    model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=15,
              validation_data=(X_test, Y_test), show_accuracy=True)
    score, acc = model.evaluate(X_test, Y_test,
                                batch_size=batch_size,
                                show_accuracy=True)
    if verbose:
        print('Test score:', score)
        print('Test accuracy:', acc)
    return score[1] 
Example 38
Project: LSTMtest1   Author: w0nk0   File: lstm_text_keras_n_lemma2.py    GNU General Public License v2.0 5 votes vote down vote up
def make_net_run52(in_size, out_size, hidden_size=20):
    model = Sequential()
    # model.add(LSTM(input_dim = in_size, output_dim = in_size, init="uniform", activation = "sigmoid", return_sequences=True))
    model.add(GRU(input_dim=in_size, output_dim=int(hidden_size),  return_sequences=True))
    #model.add(Dropout(0.1))
    #model.add(GRU(input_dim=hidden_size, output_dim=int(hidden_size),  return_sequences=False))

    model.add(Dropout(0.4))

    model.add(LSTM(input_dim=hidden_size, output_dim=128))
    model.add(Dropout(0.2))

    #model.add(Dense(input_dim=hidden_size, output_dim=out_size, init="glorot_normal", activation="softmax"))
    #model.add(TimeDistributedDense(input_dim=int(hidden_size/2), output_dim=out_size))
    model.add(Dense(input_dim=int(128), output_dim=out_size))
    model.add(Activation('softmax'))

    # model.add(Dense(input_dim = 5, output_dim = 1, init = "uniform", activation = "tanh"))
    # model.compile(loss = "mean_squared_error", optimizer = "rmsprop",class_mode="binary")
    
    #model.compile(loss='categorical_crossentropy', optimizer='rmsprop', class_mode="binary")  # or binary
    
    rmsfast = RMSprop(lr=0.05) # unused for now
    print("Compiling net..with {} input, {} outputs, {} hidden please hold!".format(in_size, out_size, hidden_size))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', class_mode="categorical")  # or binary
    
    return model 
Example 39
Project: deep_qa   Author: allenai   File: attentive_gru.py    Apache License 2.0 5 votes vote down vote up
def build(self, input_shape):
        """
        This is used by Keras to verify things, but also to build the weights.
        The only differences from the Keras GRU (which we copied exactly
        other than the below) are:
        We generate weights with dimension input_dim[2] - 1, rather than
        dimension input_dim[2].
        There are a few variables which are created in non-'gpu' modes which
        are not required. These are commented out but left in for clarity below.
        """
        new_input_shape = list(input_shape)
        new_input_shape[2] -= 1
        super(AttentiveGru, self).build(tuple(new_input_shape))
        self.input_spec = [InputSpec(shape=input_shape)] 
Example 40
Project: deep_qa   Author: allenai   File: attentive_gru.py    Apache License 2.0 5 votes vote down vote up
def preprocess_input(self, inputs, training=None):
        """
        We have to override this preprocessing step, because if we are using the cpu,
        we do the weight - input multiplications in the internals of the GRU as separate,
        smaller matrix multiplications and concatenate them after. Therefore, before this
        happens, we split off the attention and then add it back afterwards.

        """
        if self.implementation == 0:

            attention = inputs[:, :, 0]  # Shape:(samples, knowledge_length)
            inputs = inputs[:, :, 1:]  # Shape:(samples, knowledge_length, word_dim)

            input_shape = self.input_spec[0].shape
            input_dim = input_shape[2] - 1
            timesteps = input_shape[1]

            x_z = _time_distributed_dense(inputs, self.kernel_z, self.bias_z,
                                          self.dropout, input_dim, self.units,
                                          timesteps, training=training)
            x_r = _time_distributed_dense(inputs, self.kernel_r, self.bias_r,
                                          self.dropout, input_dim, self.units,
                                          timesteps, training=training)
            x_h = _time_distributed_dense(inputs, self.kernel_h, self.bias_h,
                                          self.dropout, input_dim, self.units,
                                          timesteps, training=training)

            # Add attention back on to it's original place.
            return K.concatenate([K.expand_dims(attention, 2), x_z, x_r, x_h], axis=2)
        else:
            return inputs 
Example 41
Project: emailinsight   Author: andreykurenkov   File: kerasClassify.py    MIT License 5 votes vote down vote up
def evaluate_recurrent_model(dataset,num_classes):
    (X_train, Y_train), (X_test, Y_test) = dataset
    max_features = 20000
    maxlen = 125  # cut texts after this number of words (among top max_features most common words)
    batch_size = 32

    print(len(X_train), 'train sequences')
    print(len(X_test), 'test sequences')
    print("Pad sequences (samples x time) with maxlen %d"%maxlen)
    X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
    X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Build model...')
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(GRU(512))  # try using a GRU instead, for fun
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    # try using different optimizers and different optimizer configs
    model.compile(loss='categorical_crossentropy',optimizer='adam')

    print("Train...")
    model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=15,
              validation_data=(X_test, Y_test), show_accuracy=True)
    score, acc = model.evaluate(X_test, Y_test,
                                batch_size=batch_size,
                                show_accuracy=True)
    if verbose:
        print('Test score:', score)
        print('Test accuracy:', acc)
    return score[1] 
Example 42
Project: drider   Author: w4nderlust   File: utils.py    MIT License 5 votes vote down vote up
def get_recurrent_layer(model_name, input_size, output_size, return_sequences=False):
    layer = None
    if model_name == 'rnn':
        layer = SimpleRNN(input_dim=input_size, output_dim=output_size, return_sequences=return_sequences)
    elif model_name == 'lstm':
        layer = LSTM(input_dim=input_size, output_dim=output_size, return_sequences=return_sequences)
    elif model_name == 'gru':
        layer = GRU(input_dim=input_size, output_dim=output_size, return_sequences=return_sequences)
    if layer is None:
        raise ValueError('Unknown recurrent layer: %s' % model_name)
    return layer 
Example 43
Project: CDANs   Author: a-gardner1   File: Gestures.py    MIT License 5 votes vote down vote up
def nameModelFile(prefix, useGRU, numLayers, numNodesPerLayer,
                  randSeed, trainPer, valPer, testPer, totalPer,
                  dropoutI, dropoutH, l2Reg, 
                  center = False, prependMean = False):
    
    modelFile = (prefix + ('GRU' if useGRU else 'LSTM') +'-L' + str(numLayers) 
                    +'-N' + str(numNodesPerLayer)+'-S'+str(randSeed) + '-TS-' 
                    + str(trainPer) + '-' + str(valPer) + '-' +  str(testPer) 
                    + '-' +  str(totalPer)+ '-l2-' + str(l2Reg) 
                    + (('-D-' + str(dropoutI) +'-'+ str(dropoutH)) if (dropoutI or dropoutH) else '')
                    + (('-C' + ('P' if prependMean else '')) if center else ''))
    return modelFile 
Example 44
Project: CDANs   Author: a-gardner1   File: UnlabeledGestures.py    MIT License 5 votes vote down vote up
def makeSingleOutputModel(unlabeledModel, maxNumMarkers, totalNumFeatures, 
                          numTimeSteps, numClasses, useGRU = False,
                          dropoutI = 0, dropoutH = 0, numRNNLayers = 2, numRNNNodes = 200,
                          numDeepLayers = 1, numDeepNodes = 100, activation = 'relu',
                          l2Reg = 0.0001, prependedMean = False):
    """
    Assumption: 
    unlabeledModel is completely specified from input to output, including masking, dropout, and noise.
    The unlabeledModel assumes markers are given sequentially as input.
    The unlabeledModel is a type derived from Keras Layer.

    numDeepLayers does not include the softmax output layer.
    """
    labeledInput = Input(shape=(numTimeSteps, totalNumFeatures-3*maxNumMarkers))
    unlabeledInput = Input(shape=(numTimeSteps, maxNumMarkers+(1 if prependedMean else 0), 3))
    labeledModel = Masking(0)(labeledInput)
    if dropoutI:
        labeledModel = Dropout(dropoutI)(labeledModel)
    #let totalModel start out as the complete unlabeledModel
    totalModel = TimeDistributed(unlabeledModel)(unlabeledInput)
    totalModel = merge([labeledModel, totalModel], mode = 'concat')
    for i in range(numRNNLayers):
        if useGRU:
            totalModel = GRU(output_dim = numRNNNodes, W_regularizer=l2(l2Reg), 
                             return_sequences=True)(totalModel)
        else:
            totalModel = LSTM(output_dim = numRNNNodes, W_regularizer=l2(l2Reg), 
                             return_sequences=True)(totalModel)
        if dropoutH:
            totalModel = Dropout(dropoutH)(totalModel)
    for i in range(numDeepLayers):
        totalModel = TimeDistributed(Dense(output_dim=numDeepNodes, W_regularizer = l2(l2Reg), 
                                           activation=activation))(totalModel)
        if dropoutH:
            totalModel = Dropout(dropoutH)(totalModel)
    totalModel = TimeDistributed(Dense(output_dim=numClasses, W_regularizer = l2(l2Reg),
                                       activation = 'softmax'))(totalModel)
    return Model(input = [labeledInput, unlabeledInput], output = totalModel) 
Example 45
Project: CDANs   Author: a-gardner1   File: UnlabeledGestures.py    MIT License 5 votes vote down vote up
def nameModelFile(prefix, useGRU, unlabeledModelFile, 
                  numRNNLayers, numRNNNodes, 
                  numDeepLayers, numDeepNodes,
                  numULOut, l2Reg, numUlOut):
    modelFile = '-'.join(['GRU' if useGRU else 'LSTM', 
                          'L', str(numRNNLayers), str(numDeepLayers),
                          'N', str(numRNNNodes), str(numDeepNodes), str(numULOut),
                          'l2', str(l2Reg),
                          unlabeledModelFile])
    if len(prefix) > 0:
        modelFile = prefix + '-' + modelFile
    return modelFile 
Example 46
Project: BOP2017   Author: crh19970307   File: train.py    MIT License 4 votes vote down vote up
def train():
	data=loadfromjson()
	quelist,answerlist,datalist=getdata2()
	wordlist=spiltword(datalist)
	taglist=[]
	for index,item in enumerate(data['datalist']):
		if item[0]=='0':
			taglist.append(0)
		else:
			if item[0]=='1':
				taglist.append(1)
			else:
				print('EiRROR\n')
				print(index)
				taglist.append(0)
		#print(len(data['vectorlist'][index]))
	#xa=np.array(data['vectorlist'])
	w=[]
	for items in data['wordlist']:
		w.extend(items)
	for items in wordlist:
		w.extend(items)
	dict = pd.DataFrame(pd.Series(w).value_counts())
	dict['id']=list(range(1,len(dict)+1))  
	get_sent = lambda x: list(dict['id'][x])  
	xa=[]
	for items in data['wordlist']:
		xa.append(get_sent(items))
	#get_sent(data['wordlist'])
	tmp=list(sequence.pad_sequences(xa, maxlen=20,padding='post',truncating='post'))
	xa=np.array(tmp)
	#xa=list(sequence.pad_sequences(xa, maxlen=20)) 
	ya=np.array(taglist)
	#print(np.size(xa))
	#print(np.size(ya))
	print('Build model...')  
	model = Sequential()  
	model.add(Embedding(len(dict)+1,256))  
	#model.add(LSTM(128)) # try using a GRU instead, for fun  
	#model.add(LSTM(32,input_shape=(10,60)))
	#model.add(LSTM(32,input_length=50,input_dim=60))
	model.add(LSTM(128))
	print('LSTM added')
	model.add(Dropout(0.5))  	
	model.add(Dense( 1))  
	model.add(Activation('sigmoid'))  
	model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary")  
	model.fit(xa, ya, batch_size=128, nb_epoch=20) #训练时间为若干个小时  
	model.save('my_model.h5') 
Example 47
Project: BOP2017   Author: crh19970307   File: train_mix.py    MIT License 4 votes vote down vote up
def train():

	import os

	gpu_id = '0'

	os.environ['CUDA_VISIBLE_DEVICES']=str(gpu_id)

	os.system('echo $CUDA_VISIBLE_DEVICES')

	data=loadfromjson()

	taglist=[]

	for index,item in enumerate(data['datalist']):

		if item[0]=='0':

			taglist.append(0)

		else:

			if item[0]=='1':

				taglist.append(1)

			else:

				print('EiRROR\n')

				print(index)

				taglist.append(0)

		#print(len(data['vectorlist'][index]))

	#xa=np.array(data['vectorlist'])

	xa=np.zeros((len(data['vectorlist']),30,60),dtype='float64')

	for index1,items in enumerate(data['vectorlist']):

		for index2,item2 in enumerate(items):

			if index2==30:

				break

			xa[index1][index2]=item2



	#xa=np.random.rand(len(data['vectorlist']),50,60)

	ya=np.array(taglist)

	#print(np.size(xa))

	#print(np.size(ya))

	print('Build model...')  

	model = Sequential()  

	#model.add(Embedding(60,32))  

	#model.add(LSTM(128)) # try using a GRU instead, for fun  

	#model.add(LSTM(32,input_shape=(10,60)))

	#model.add(LSTM(32,input_length=50,input_dim=60))

	model.add(LSTM(128,input_length=30,input_dim=60))

	print('LSTM added')

	model.add(Dropout(0.5))  	

	model.add(Dense( 1))  

	model.add(Activation('sigmoid'))  

	model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary")  

	model.fit(xa, ya, batch_size=128, nb_epoch=5) #训练时间为若干个小时  

	model.save('sentencelen30_230000_lstm128_epoch5_model.h5') 
Example 48
Project: BOP2017   Author: crh19970307   File: train.py    MIT License 4 votes vote down vote up
def train():
	import os
	gpu_id = '0'
	os.environ['CUDA_VISIBLE_DEVICES']=str(gpu_id)
	os.system('echo $CUDA_VISIBLE_DEVICES')
	data=loadfromjson()
	taglist=[]
	for index,item in enumerate(data['datalist']):
		if item[0]=='0':
			taglist.append(0)
		else:
			if item[0]=='1':
				taglist.append(1)
			else:
				print('EiRROR\n')
				print(index)
				taglist.append(0)
		#print(len(data['vectorlist'][index]))
	#xa=np.array(data['vectorlist'])
	xa=np.zeros((len(data['vectorlist1']),45,60),dtype='float64')
	for index1,items in enumerate(data['vectorlist1']):
		for index2,item2 in enumerate(items):
			if index2==15:
				break
			xa[index1][index2]=item2
	for index1,items in enumerate(data['vectorlist2']):
		for index2,item2 in enumerate(items):
			if index2==30:
				break
			xa[index1][index2+15]=item2		

	#xa=np.random.rand(len(data['vectorlist']),50,60)
	ya=np.array(taglist)
	#print(np.size(xa))
	#print(np.size(ya))
	print('Build model...')  
	model = Sequential()  
	#model.add(Embedding(60,32))  
	#model.add(LSTM(128)) # try using a GRU instead, for fun  
	#model.add(LSTM(32,input_shape=(10,60)))
	#model.add(LSTM(32,input_length=50,input_dim=60))
	model.add(LSTM(128,input_length=45,input_dim=60))
	print('LSTM added')
	model.add(Dropout(0.5))  	
	model.add(Dense( 1))  
	model.add(Activation('sigmoid'))  
	model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary")  
	model.fit(xa, ya, batch_size=128, nb_epoch=100) #训练时间为若干个小时  
	model.save('sentencelen15+30_230000_lstm128_epoch100_model.h5') 
Example 49
Project: CAPTCHA-breaking   Author: lllcho   File: test_recurrent.py    MIT License 4 votes vote down vote up
def test_gru(self):
        _runner(recurrent.GRU) 
Example 50
Project: cervantes   Author: textclf   File: models.py    MIT License 4 votes vote down vote up
def _generate_model(self, lembedding, num_classes=2, rnn_dim=32):

        WORD_PER_SENTENCES = lembedding.size_level1
        SENTENCES_PER_DOCUMENT = lembedding.size_level2
        EMBEDDING_DIM = lembedding.vector_box.vector_dim

        INPUT_SHAPE = (WORD_PER_SENTENCES * SENTENCES_PER_DOCUMENT, )
        EMBEDDING_SHAPE = (SENTENCES_PER_DOCUMENT, WORD_PER_SENTENCES, EMBEDDING_DIM)

        doc = Input(shape=(INPUT_SHAPE[0], ), dtype='int32')

        embedded = Sequential([
            Embedding(
                input_dim=lembedding.vector_box.size,
                output_dim=EMBEDDING_DIM,
                input_length=INPUT_SHAPE[0]
            ),
            Reshape(EMBEDDING_SHAPE)
        ])(doc)

        out = TimeDistributed(GRU(rnn_dim))(embedded)
        next = Dropout(0.5)(out)
        out = GRU(rnn_dim)(next)
        out = Dropout(0.5)(out)

        mapping = [
            Dense(64, activation='relu'),  # Maybe add more layers
        ]

        for f in mapping:
            out = f(out)

        if num_classes == 2:
            out = Dense(1, activation='sigmoid')(out)
            model = Model(input=doc, output=out)
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            out = Dense(num_classes, activation='softmax')(out)
            model = Model(input=doc, output=out)
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model 
Example 51
Project: applications   Author: geomstats   File: recurrent_test.py    MIT License 4 votes vote down vote up
def rnn_test(f):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    f = keras_test(f)
    return pytest.mark.parametrize('layer_class', [
        recurrent.SimpleRNN,
        recurrent.GRU,
        recurrent.LSTM
    ])(f) 
Example 52
Project: distnet   Author: ssamot   File: neuralnetworks.py    GNU General Public License v2.0 4 votes vote down vote up
def distancenet(self, vocab_size, output_size,  maxsize = 1, hop_depth = -1, dropout = False, d_perc = 1,  type = "CCE"):
        print(bcolors.UNDERLINE + 'Building nn model...' + bcolors.ENDC)


        sentrnn = Sequential()
        emb = Embedding(vocab_size, self.embed_hidden_size, mask_zero=False,W_constraint=mx(), W_regularizer=reg(), init = init_function)
        sentrnn.add(emb)

        sentrnn.add(MaxPooling1D(pool_length=maxsize))
        #sentrnn.add(UpSample1D(length=4))
        #sentrnn.add(GRU( self.query_hidden_size, return_sequences=True,activation = "elu", init = init_function))
        #sentrnn.add(DownSample1D(length=maxsize))
        #sentrnn.add(Dropout(0.6))
        # sentrnn.add(TimeDistributedDense(self.sent_hidden_size, activation = "elu", init = init_function))
        # sentrnn.add(Dropout(0.2))


        qrnn = Sequential()


        emb = Embedding(vocab_size, self.embed_hidden_size, mask_zero=False,W_constraint=mx(), W_regularizer=reg(), init = init_function)
        qrnn.add(emb)

        qrnn.add(SimpleRNN( self.query_hidden_size, return_sequences=False,activation = "leakyrelu", init = init_function))
        #qrnn.add(BatchNormalization(mode = 1, momentum=0.9))
        #qrnn.add(Dense)
        #qrnn.add(AttentionRecurrent(self.query_hidden_size))



        init_qa = [sentrnn, qrnn]
        past = []
        for i in range(hop_depth):
            hop = Sequential()
            l_size = self.sent_hidden_size
            hop.add(AttentionMerge(init_qa + past, input_shape = (None, None, l_size), mode = "distance"))
            hop.add(Dropout(0.1))
            hop.add(TimeDistributedDense(self.sent_hidden_size, activation = "leakyrelu", init = init_function))
            hop.add(Dropout(0.1))
            hop.add(AttentionRecurrent(self.sent_hidden_size, init = init_function))
            hop.add(Dropout(0.1))
            past.append(hop)


        model = hop
        model.add(bn())





        self._adddepth(model, output_size, dropout, d_perc, softmax = (type == "CCE"))

        if(type == "CCE"):
            model.compile(optimizer=self._getopt(), loss='categorical_crossentropy', class_mode='categorical')
        else:
            model.compile(optimizer=self._getopt(), loss='mse')



        return model 
Example 53
Project: ML_learn   Author: jeffmxh   File: train.py    MIT License 4 votes vote down vote up
def build_model(wordvec_weight, params, logger):
    if wordvec_weight!='':
        word_embedding_layer = Embedding(
            input_dim=wordvec_weight.shape[0],
            output_dim=wordvec_weight.shape[1],
            weights=[wordvec_weight],
            trainable=params.embedding_train)
    else:
        word_embedding_layer = Embedding(params.dict_len+1, 256)
    logger.info('Build model...')
    model = Sequential()
    model.add(word_embedding_layer)
    model.add(Dropout(0.1))
    if params.layer=='lstm':
        model.add(LSTM(128, return_sequences = False))
    elif params.layer=='bilstm':
        model.add(Bidirectional(LSTM(128, return_sequences = False))) 
    elif params.layer=='gru':
        model.add(GRU(128, return_sequences = False))
    elif params.layer=='cnn':
        model.add(Conv1D(256,
                         3,
                         padding='valid',
                         activation='relu',
                         strides=1))
        model.add(GlobalMaxPooling1D())
        # model.add(MaxPooling1D(pool_size = 2))
        # model.add(Flatten())
        model.add(Dropout(0.5))
        model.add(Dense(128))
        model.add(Dropout(0.5))
        model.add(Activation('relu'))
    elif params.layer=='ConvRnn':
        model.add(Conv1D(250,
                         3,
                         padding='valid',
                         activation='relu',
                         strides=1))
        # we use max pooling:
        model.add(MaxPooling1D(pool_size = 2))
        model.add(Dropout(0.3))
        model.add(LSTM(128))
    else: # 采取自定义的模型结构
        model.add(GRU(128, return_sequences = True))
        model.add(Dropout(0.2))
        model.add(GRU(64, return_sequences = False))
    model.add(Dropout(0.5))
    model.add(Dense(params.num_classes))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[metrics.mae, metrics.categorical_accuracy])
    return model 
Example 54
Project: LipNet   Author: rizkiarm   File: model2.py    MIT License 4 votes vote down vote up
def build(self):
        if K.image_data_format() == 'channels_first':
            input_shape = (self.img_c, self.frames_n, self.img_w, self.img_h)
        else:
            input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c)

        self.input_data = Input(name='the_input', shape=input_shape, dtype='float32')

        self.zero1 = ZeroPadding3D(padding=(1, 2, 2), name='zero1')(self.input_data)
        self.conv1 = Conv3D(32, (3, 5, 5), strides=(1, 2, 2), kernel_initializer='he_normal', name='conv1')(self.zero1)
        self.batc1 = BatchNormalization(name='batc1')(self.conv1)
        self.actv1 = Activation('relu', name='actv1')(self.batc1)
        self.drop1 = SpatialDropout3D(0.5)(self.actv1)
        self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max1')(self.drop1)

        self.zero2 = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(self.maxp1)
        self.conv2 = Conv3D(64, (3, 5, 5), strides=(1, 1, 1), kernel_initializer='he_normal', name='conv2')(self.zero2)
        self.batc2 = BatchNormalization(name='batc2')(self.conv2)
        self.actv2 = Activation('relu', name='actv2')(self.batc2)
        self.drop2 = SpatialDropout3D(0.5)(self.actv2)
        self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max2')(self.drop2)

        self.zero3 = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(self.maxp2)
        self.conv3 = Conv3D(96, (3, 3, 3), strides=(1, 1, 1), kernel_initializer='he_normal', name='conv3')(self.zero3)
        self.batc3 = BatchNormalization(name='batc3')(self.conv3)
        self.actv3 = Activation('relu', name='actv3')(self.batc3)
        self.drop3 = SpatialDropout3D(0.5)(self.actv3)
        self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max3')(self.drop3)

        self.resh1 = TimeDistributed(Flatten())(self.maxp3)

        self.gru_1 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru1'), merge_mode='concat')(self.resh1)
        self.gru_2 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru2'), merge_mode='concat')(self.gru_1)

        # transforms RNN output to character activations:
        self.dense1 = Dense(self.output_size, kernel_initializer='he_normal', name='dense1')(self.gru_2)

        self.y_pred = Activation('softmax', name='softmax')(self.dense1)

        self.labels = Input(name='the_labels', shape=[self.absolute_max_string_len], dtype='float32')
        self.input_length = Input(name='input_length', shape=[1], dtype='int64')
        self.label_length = Input(name='label_length', shape=[1], dtype='int64')

        self.loss_out = CTC('ctc', [self.y_pred, self.labels, self.input_length, self.label_length])

        self.model = Model(inputs=[self.input_data, self.labels, self.input_length, self.label_length], outputs=self.loss_out) 
Example 55
Project: GRU-D   Author: PeterChe1990   File: grud_layers.py    MIT License 4 votes vote down vote up
def __init__(self, units,
                 x_imputation='zero',
                 input_decay='exp_relu', hidden_decay='exp_relu', use_decay_bias=True,
                 feed_masking=True, masking_decay=None,
                 decay_initializer='zeros', decay_regularizer=None,
                 decay_constraint=None,
                 **kwargs):
        super(GRUDCell, self).__init__(units, **kwargs)

        assert 'reset_after' not in kwargs or not kwargs['reset_after'], (
            'Only the default GRU reset gate can be used in GRU-D.'
        )
        assert ('implementation' not in kwargs
                or kwargs['implementation'] == 1), (
                    'Only Implementation-1 (larger number of smaller operations) '
                    'is supported in GRU-D.'
                )

        assert x_imputation in _SUPPORTED_IMPUTATION, (
            'x_imputation {} argument is not supported.'.format(x_imputation)
        )
        self.x_imputation = x_imputation

        self.input_decay = get_activation(input_decay)
        self.hidden_decay = get_activation(hidden_decay)
        self.use_decay_bias = use_decay_bias

        assert (feed_masking or masking_decay is None
                or masking_decay == 'None'), (
                    'Mask needs to be fed into GRU-D to enable the mask_decay.'
                )
        self.feed_masking = feed_masking
        if self.feed_masking:
            self.masking_decay = get_activation(masking_decay)
            self._masking_dropout_mask = None
        else:
            self.masking_decay = None
        
        if (self.input_decay is not None
            or self.hidden_decay is not None
            or self.masking_decay is not None):
            self.decay_initializer = initializers.get(decay_initializer)
            self.decay_regularizer = regularizers.get(decay_regularizer)
            self.decay_constraint = constraints.get(decay_constraint) 
Example 56
Project: GRU-D   Author: PeterChe1990   File: grud_layers.py    MIT License 4 votes vote down vote up
def __init__(self, units,
                 activation='sigmoid',
                 recurrent_activation='hard_sigmoid',
                 use_bias=True,
                 kernel_initializer='glorot_uniform',
                 recurrent_initializer='orthogonal',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 recurrent_regularizer=None,
                 bias_regularizer=None,
                 kernel_constraint=None,
                 recurrent_constraint=None,
                 bias_constraint=None,
                 dropout=0.,
                 recurrent_dropout=0.,
                 x_imputation='zero',
                 input_decay='exp_relu',
                 hidden_decay='exp_relu',
                 use_decay_bias=True,
                 feed_masking=True,
                 masking_decay=None,
                 decay_initializer='zeros',
                 decay_regularizer=None,
                 decay_constraint=None,
                 **kwargs):

        cell = GRUDCell(units,
                        activation=activation,
                        recurrent_activation=recurrent_activation,
                        use_bias=use_bias,
                        kernel_initializer=kernel_initializer,
                        recurrent_initializer=recurrent_initializer,
                        bias_initializer=bias_initializer,
                        kernel_regularizer=kernel_regularizer,
                        recurrent_regularizer=recurrent_regularizer,
                        bias_regularizer=bias_regularizer,
                        kernel_constraint=kernel_constraint,
                        recurrent_constraint=recurrent_constraint,
                        bias_constraint=bias_constraint,
                        dropout=dropout,
                        recurrent_dropout=recurrent_dropout,
                        x_imputation=x_imputation,
                        input_decay=input_decay,
                        hidden_decay=hidden_decay,
                        use_decay_bias=use_decay_bias,
                        feed_masking=feed_masking,
                        masking_decay=masking_decay,
                        decay_initializer=decay_initializer,
                        decay_regularizer=decay_regularizer,
                        decay_constraint=decay_constraint)
        if 'unroll' in kwargs and kwargs['unroll']:
            raise ValueError('GRU-D does not support unroll.')
        if 'activity_regularizer' in kwargs:
            self.activity_regularizer = regularizers.get(
                kwargs.pop('activity_regularizer'))
        else:
            self.activity_regularizer = None
        # Skip the ` __init__()` of `GRU` and the differences are handled.
        super(GRU, self).__init__(cell, **kwargs)
        self.input_spec = [InputSpec(ndim=3), InputSpec(ndim=3), InputSpec(ndim=3)] 
Example 57
Project: GRU-D   Author: PeterChe1990   File: grud_layers.py    MIT License 4 votes vote down vote up
def call(self, inputs, mask=None, training=None, initial_state=None):
        # We need to rewrite this `call` method by combining `RNN`'s and `GRU`'s.
        self.cell._dropout_mask = None
        self.cell._recurrent_dropout_mask = None
        self.cell._masking_dropout_mask = None

        inputs = inputs[:3]

        if initial_state is not None:
            pass
        elif self.stateful:
            initial_state = self.states
        else:
            initial_state = self.get_initial_state(inputs)

        if len(initial_state) != len(self.states):
            raise ValueError('Layer has ' + str(len(self.states)) +
                             ' states but was passed ' +
                             str(len(initial_state)) +
                             ' initial states.')
        timesteps = K.int_shape(inputs[0])[1]

        kwargs = {}
        if has_arg(self.cell.call, 'training'):
            kwargs['training'] = training

        def step(inputs, states):
            return self.cell.call(inputs, states, **kwargs)
        # concatenate the inputs and get the mask

        concatenated_inputs = K.concatenate(inputs, axis=-1)
        mask = mask[0]
        last_output, outputs, states = K.rnn(step,
                                             concatenated_inputs,
                                             initial_state,
                                             go_backwards=self.go_backwards,
                                             mask=mask,
                                             unroll=self.unroll,
                                             input_length=timesteps)
        if self.stateful:
            updates = []
            for i, state in enumerate(states):
                updates.append((self.states[i], state))
            self.add_update(updates, inputs)

        if self.return_sequences:
            output = outputs
        else:
            output = last_output

        # Properly set learning phase
        if getattr(last_output, '_uses_learning_phase', False):
            output._uses_learning_phase = True
            for state in states:
                state._uses_learning_phase = True

        if self.return_state:
            states = list(states)[:-2] # remove x_keep and ss
            return [output] + states
        return output 
Example 58
Project: GRU-D   Author: PeterChe1990   File: models.py    MIT License 4 votes vote down vote up
def create_grud_model(input_dim, recurrent_dim, hidden_dim,
                      output_dim, output_activation,
                      predefined_model=None,
                      use_bidirectional_rnn=False, use_batchnorm=False, **kwargs):

    if (predefined_model is not None
            and predefined_model in _PREDEFINED_MODEL_LIST):
        for c, v in _PREDEFINED_MODEL_LIST[predefined_model].items():
            kwargs[c] = v
    # Input
    input_x = Input(shape=(None, input_dim))
    input_m = Input(shape=(None, input_dim))
    input_s = Input(shape=(None, 1))
    input_list = [input_x, input_m, input_s]
    input_x = ExternalMasking()([input_x, input_m])
    input_s = ExternalMasking()([input_s, input_m])
    input_m = Masking()(input_m)
    # GRU layers
    grud_layer = GRUD(units=recurrent_dim[0],
                      return_sequences=len(recurrent_dim) > 1,
                      activation='sigmoid',
                      dropout=0.3,
                      recurrent_dropout=0.3,
                      **kwargs
                     )
    if use_bidirectional_rnn:
        grud_layer = Bidirectional_for_GRUD(grud_layer)
    x = grud_layer([input_x, input_m, input_s])
    for i, rd in enumerate(recurrent_dim[1:]):
        gru_layer = GRU(units=rd,
                        return_sequences=i < len(recurrent_dim) - 2,
                        dropout=0.3,
                        recurrent_dropout=0.3,
                       )
        if use_bidirectional_rnn:
            gru_layer = Bidirectional(gru_layer)
        x = gru_layer(x)
    # MLP layers
    x = Dropout(.3)(x)
    for hd in hidden_dim:        
        x = Dense(units=hd,
                  kernel_regularizer=l2(1e-4))(x)
        if use_batchnorm:
            x = BatchNormalization()(x)
        x = Activation('relu')(x)
    x = Dense(output_dim, activation=output_activation)(x)
    output_list = [x]

    model = Model(inputs=input_list, outputs=output_list)
    return model 
Example 59
Project: CRNN_CTC_English_Handwriting_Recognition   Author: huyhoang17   File: models.py    MIT License 4 votes vote down vote up
def CRNN_model():
    act = 'relu'
    input_data = Input(name='the_input', shape=cf.INPUT_SHAPE, dtype='float32')
    inner = Conv2D(cf.CONV_FILTERS, cf.KERNEL_SIZE, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(cf.POOL_SIZE, cf.POOL_SIZE), name='max1')(
        inner)
    inner = Conv2D(cf.CONV_FILTERS, cf.KERNEL_SIZE, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(cf.POOL_SIZE, cf.POOL_SIZE), name='max2')(
        inner)

    conv_to_rnn_dims = (cf.IMG_W // (cf.POOL_SIZE ** 2),
                        (cf.IMG_H // (cf.POOL_SIZE ** 2)) * cf.CONV_FILTERS)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(cf.TIME_DENSE_SIZE, activation=act, name='dense1')(inner)

    gru_1 = GRU(cf.RNN_SIZE, return_sequences=True,
                kernel_initializer='he_normal', name='gru1')(inner)
    gru_1b = GRU(cf.RNN_SIZE, return_sequences=True, go_backwards=True,
                 kernel_initializer='he_normal', name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(cf.RNN_SIZE, return_sequences=True,
                kernel_initializer='he_normal', name='gru2')(gru1_merged)
    gru_2b = GRU(cf.RNN_SIZE, return_sequences=True, go_backwards=True,
                 kernel_initializer='he_normal', name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(cf.NO_LABELS, kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)

    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels', shape=[cf.MAX_LEN_TEXT], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')

    # loss function
    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')(
        [y_pred, labels, input_length, label_length]
    )

    model = Model(inputs=[input_data, labels,
                          input_length, label_length], outputs=loss_out)

    y_func = K.function([input_data], [y_pred])

    return model, y_func 
Example 60
Project: DeepPavlov   Author: deepmipt   File: keras_classification_model.py    Apache License 2.0 4 votes vote down vote up
def bigru_model(self, units_gru: int, dense_size: int,
                    coef_reg_lstm: float = 0., coef_reg_den: float = 0.,
                    dropout_rate: float = 0., rec_dropout_rate: float = 0.,
                    input_projection_size: Optional[int] = None, **kwargs) -> Model:
        """
        Method builds uncompiled model BiGRU.

        Args:
            units_gru: number of units for GRU.
            dense_size: number of units for dense layer.
            coef_reg_lstm: l2-regularization coefficient for GRU. Default: ``0.0``.
            coef_reg_den: l2-regularization coefficient for dense layers. Default: ``0.0``.
            dropout_rate: dropout rate to be used after BiGRU and between dense layers. Default: ``0.0``.
            rec_dropout_rate: dropout rate for GRU. Default: ``0.0``.
            input_projection_size: if not None, adds Dense layer (with ``relu`` activation)
                                   right after input layer to the size ``input_projection_size``.
                                   Useful for input dimentionaliry recuction. Default: ``None``.
            kwargs: other non-used parameters

        Returns:
            keras.models.Model: uncompiled instance of Keras Model
        """

        inp = Input(shape=(self.opt['text_size'], self.opt['embedding_size']))
        output = inp

        if input_projection_size is not None:
            output = Dense(input_projection_size, activation='relu')(output)

        output = Bidirectional(GRU(units_gru, activation='tanh',
                                   return_sequences=True,
                                   kernel_regularizer=l2(coef_reg_lstm),
                                   dropout=dropout_rate,
                                   recurrent_dropout=rec_dropout_rate))(output)

        output = GlobalMaxPooling1D()(output)
        output = Dropout(rate=dropout_rate)(output)
        output = Dense(dense_size, activation=None,
                       kernel_regularizer=l2(coef_reg_den))(output)
        output = Activation('relu')(output)
        output = Dropout(rate=dropout_rate)(output)
        output = Dense(self.n_classes, activation=None,
                       kernel_regularizer=l2(coef_reg_den))(output)
        act_output = Activation(self.opt.get("last_layer_activation", "sigmoid"))(output)
        model = Model(inputs=inp, outputs=act_output)
        return model 
Example 61
Project: DeepPavlov   Author: deepmipt   File: keras_classification_model.py    Apache License 2.0 4 votes vote down vote up
def bigru_with_max_aver_pool_model(self, units_gru: int, dense_size: int,
                                       coef_reg_gru: float = 0., coef_reg_den: float = 0.,
                                       dropout_rate: float = 0., rec_dropout_rate: float = 0.,
                                       **kwargs) -> Model:
        """
        Method builds uncompiled model Bidirectional GRU with concatenation of max and average pooling after BiGRU.

        Args:
            units_gru: number of units for GRU.
            dense_size: number of units for dense layer.
            coef_reg_gru: l2-regularization coefficient for GRU. Default: ``0.0``.
            coef_reg_den: l2-regularization coefficient for dense layers. Default: ``0.0``.
            dropout_rate: dropout rate to be used after BiGRU and between dense layers. Default: ``0.0``.
            rec_dropout_rate: dropout rate for GRU. Default: ``0.0``.
            kwargs: other non-used parameters

        Returns:
            keras.models.Model: uncompiled instance of Keras Model
        """

        inp = Input(shape=(self.opt['text_size'], self.opt['embedding_size']))

        output = Dropout(rate=dropout_rate)(inp)

        output, state1, state2 = Bidirectional(GRU(units_gru, activation='tanh',
                                                   return_sequences=True,
                                                   return_state=True,
                                                   kernel_regularizer=l2(coef_reg_gru),
                                                   dropout=dropout_rate,
                                                   recurrent_dropout=rec_dropout_rate))(output)

        output1 = GlobalMaxPooling1D()(output)
        output2 = GlobalAveragePooling1D()(output)

        output = Concatenate()([output1, output2, state1, state2])

        output = Dropout(rate=dropout_rate)(output)
        output = Dense(dense_size, activation=None,
                       kernel_regularizer=l2(coef_reg_den))(output)
        output = Activation('relu')(output)
        output = Dropout(rate=dropout_rate)(output)
        output = Dense(self.n_classes, activation=None,
                       kernel_regularizer=l2(coef_reg_den))(output)
        act_output = Activation(self.opt.get("last_layer_activation", "sigmoid"))(output)
        model = Model(inputs=inp, outputs=act_output)
        return model