def RNNModel(vocab_size, max_len, rnnConfig, model_type):
	embedding_size = rnnConfig['embedding_size']
	if model_type == 'inceptionv3':
		# InceptionV3 outputs a 2048 dimensional vector for each image, which we'll feed to RNN Model
		image_input = Input(shape=(2048,))
	elif model_type == 'vgg16':
		# VGG16 outputs a 4096 dimensional vector for each image, which we'll feed to RNN Model
		image_input = Input(shape=(4096,))
	image_model_1 = Dropout(rnnConfig['dropout'])(image_input)
	image_model = Dense(embedding_size, activation='relu')(image_model_1)

	caption_input = Input(shape=(max_len,))
	# mask_zero: We zero pad inputs to the same length, the zero mask ignores those inputs. E.g. it is an efficiency.
	caption_model_1 = Embedding(vocab_size, embedding_size, mask_zero=True)(caption_input)
	caption_model_2 = Dropout(rnnConfig['dropout'])(caption_model_1)
	caption_model = LSTM(rnnConfig['LSTM_units'])(caption_model_2)

	# Merging the models and creating a softmax classifier
	final_model_1 = concatenate([image_model, caption_model])
	final_model_2 = Dense(rnnConfig['dense_units'], activation='relu')(final_model_1)
	final_model = Dense(vocab_size, activation='softmax')(final_model_2)

	model = Model(inputs=[image_input, caption_input], outputs=final_model)
	model.compile(loss='categorical_crossentropy', optimizer='adam')
	return model 
def __build_model(self):
        model = Sequential()

        embedding_layer = Embedding(input_dim=len(self.vocab) + 1,

        bilstm_layer = Bidirectional(LSTM(units=256, return_sequences=True))

        model.add(TimeDistributed(Dense(256, activation="relu")))

        crf_layer = CRF(units=len(self.tags), sparse_target=True)

        model.compile(optimizer="adam", loss=crf_loss, metrics=[crf_viterbi_accuracy])

        return model 
def fasttext_model(max_len=300,
    model = Sequential()

    # embed layer by maps vocab index into emb dimensions
    model.add(Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, input_length=max_len))
    # pooling the embedding
    # output multi classification of num_classes
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model 
def model_keras(num_words=3000, num_units=128):
    :param num_words:词汇数量
    :param num_units:词向量维度,lstm神经元数量默认一样
    data_input = Input(shape=[None])
    embedding = Embedding(input_dim=num_words, output_dim=num_units, mask_zero=True)(data_input)
    lstm = LSTM(units=num_units, return_sequences=True)(embedding)
    x = LSTM(units=num_units, return_sequences=True)(lstm)
    # keras好像不支持内部对y操作,不能像tensorflow那样用reshape
    # x = Reshape(target_shape=[-1, num_units])(x)
    outputs = Dense(units=num_words, activation='softmax')(x)

    model = Model(inputs=data_input, outputs=outputs)
    return model 
def Discriminator(V, E, H=64, dropout=0.1):
    Disciriminator model.
    # Arguments:
        V: int, Vocabrary size
        E: int, Embedding size
        H: int, LSTM hidden size
        dropout: float
    # Returns:
        discriminator: keras model
            input: word ids, shape = (B, T)
            output: probability of true data or not, shape = (B, 1)
    input = Input(shape=(None,), dtype='int32', name='Input')   # (B, T)
    out = Embedding(V, E, mask_zero=True, name='Embedding')(input)  # (B, T, E)
    out = LSTM(H)(out)
    out = Highway(out, num_layers=1)
    out = Dropout(dropout, name='Dropout')(out)
    out = Dense(1, activation='sigmoid', name='FC')(out)

    discriminator = Model(input, out)
    return discriminator 
def CapsuleNet_v2(n_capsule = 10, n_routings = 5, capsule_dim = 16,
     n_recurrent=100, dropout_rate=0.2, l2_penalty=0.0001):

    inputs = Input(shape=(200,))
    x = Embedding(20000, 300,  trainable=True)(inputs)        
    x = SpatialDropout1D(dropout_rate)(x)
    x = Bidirectional(
        CuDNNGRU(n_recurrent, return_sequences=True,
    x = PReLU()(x)
    x = Capsule(
        num_capsule=n_capsule, dim_capsule=capsule_dim,
        routings=n_routings, share_weights=True)(x)
    x = Flatten(name = 'concatenate')(x)
    x = Dropout(dropout_rate)(x)
#     fc = Dense(128, activation='sigmoid')(x)
    outputs = Dense(6, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy'])
    return model 
def __init__(self, sess, B, V, E, H, lr=1e-3):
        # Arguments:
            B: int, Batch size
            V: int, Vocabrary size
            E: int, Embedding size
            H: int, LSTM hidden size
        # Optional Arguments:
            lr: float, learning rate, default is 0.001
        self.sess = sess
        self.B = B
        self.V = V
        self.E = E
        self.H = H = lr
def GeneratorPretraining(V, E, H):
    Model for Generator pretraining. This model's weights should be shared with
    # Arguments:
        V: int, Vocabrary size
        E: int, Embedding size
        H: int, LSTM hidden size
    # Returns:
        generator_pretraining: keras Model
            input: word ids, shape = (B, T)
            output: word probability, shape = (B, T, V)
    # in comment, B means batch size, T means lengths of time steps.
    input = Input(shape=(None,), dtype='int32', name='Input') # (B, T)
    out = Embedding(V, E, mask_zero=True, name='Embedding')(input) # (B, T, E)
    out = LSTM(H, return_sequences=True, name='LSTM')(out)  # (B, T, H)
    out = TimeDistributed(
        Dense(V, activation='softmax', name='DenseSoftmax'),
        name='TimeDenseSoftmax')(out)    # (B, T, V)
    generator_pretraining = Model(input, out)
    return generator_pretraining 
def parse_args():
    parser = argparse.ArgumentParser(description="Run GMF.")
    parser.add_argument('--path', nargs='?', default='Data/',
                        help='Input data path.')
    parser.add_argument('--dataset', nargs='?', default='ml-1m',
                        help='Choose a dataset.')
    parser.add_argument('--epochs', type=int, default=100,
                        help='Number of epochs.')
    parser.add_argument('--batch_size', type=int, default=256,
                        help='Batch size.')
    parser.add_argument('--num_factors', type=int, default=8,
                        help='Embedding size.')
    parser.add_argument('--regs', nargs='?', default='[0,0]',
                        help="Regularization for user and item embeddings.")
    parser.add_argument('--num_neg', type=int, default=4,
                        help='Number of negative instances to pair with a positive instance.')
    parser.add_argument('--lr', type=float, default=0.001,
                        help='Learning rate.')
    parser.add_argument('--learner', nargs='?', default='adam',
                        help='Specify an optimizer: adagrad, adam, rmsprop, sgd')
    parser.add_argument('--verbose', type=int, default=1,
                        help='Show performance per X iterations')
    parser.add_argument('--out', type=int, default=1,
                        help='Whether to save the trained model.')
    return parser.parse_args() 
def get_embedding_matrix(self):
        Returns Embedding matrix
        embedding_matrix = np.random.random((len(self.word_index) + 1, self.embed_size))
        absent_words = 0
        for word, i in self.word_index.items():
            embedding_vector = self.embedding_index.get(word)
            if embedding_vector is not None:
                # words not found in embedding index will be all-zeros.
                embedding_matrix[i] = embedding_vector
                absent_words += 1
        if self.verbose == 1:
            print('Total absent words are', absent_words, 'which is', "%0.2f" %
                (absent_words * 100 / len(self.word_index)), '% of total words')
        return embedding_matrix 
def add_glove_model(self):
        Read and save Pretrained Embedding model
        embeddings_index = {}
            f = open(self.embedded_dir)
            for line in f:
                values = line.split()
                word = values[0]
                coefs = np.asarray(values[1:], dtype='float32')
                assert (coefs.shape[0] == self.embed_size)
                embeddings_index[word] = coefs
        except OSError:
            print('Embedded file does not found')
        except AssertionError:
            print("Embedding vector size does not match with given embedded size")
        return embeddings_index 
def get_model(num_users, num_items, latent_dim, regs=[0,0]):
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
    item_input = Input(shape=(1,), dtype='int32', name = 'item_input')

    MF_Embedding_User = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embedding',
                                  init = init_normal, W_regularizer = l2(regs[0]), input_length=1)
    MF_Embedding_Item = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embedding',
                                  init = init_normal, W_regularizer = l2(regs[1]), input_length=1)   
    # Crucial to flatten an embedding vector!
    user_latent = Flatten()(MF_Embedding_User(user_input))
    item_latent = Flatten()(MF_Embedding_Item(item_input))
    # Element-wise product of user and item embeddings 
    predict_vector = merge([user_latent, item_latent], mode = 'mul')
    # Final prediction layer
    #prediction = Lambda(lambda x: K.sigmoid(K.sum(x)), output_shape=(1,))(predict_vector)
    prediction = Dense(1, activation='sigmoid', init='lecun_uniform', name = 'prediction')(predict_vector)
    model = Model(input=[user_input, item_input], 

    return model 
def CapsuleNet(n_capsule = 10, n_routings = 5, capsule_dim = 16,
     n_recurrent=100, dropout_rate=0.2, l2_penalty=0.0001):

    inputs = Input(shape=(170,))
    x = Embedding(21099, 300,  trainable=True)(inputs)        
    x = SpatialDropout1D(dropout_rate)(x)
    x = Bidirectional(
        CuDNNGRU(n_recurrent, return_sequences=True,
    x = PReLU()(x)
    x = Capsule(
        num_capsule=n_capsule, dim_capsule=capsule_dim,
        routings=n_routings, share_weights=True)(x)
    x = Flatten(name = 'concatenate')(x)
    x = Dropout(dropout_rate)(x)
#     fc = Dense(128, activation='sigmoid')(x)
    outputs = Dense(6, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy'])
    return model 
def get_model(num_users, num_items, latent_dim, regs=[0,0]):
    user_input = Input(shape=(1,), dtype='int32', name='user_input')
    item_input = Input(shape=(1,), dtype='int32', name='item_input')
    MF_Embedding_User = Embedding(input_dim=num_users, output_dim=latent_dim, name='user_embedding',
                                  embeddings_regularizer = l2(regs[0]), input_length=1)
    MF_Embedding_Item = Embedding(input_dim=num_items, output_dim=latent_dim, name='item_embedding',
                                  embeddings_regularizer = l2(regs[1]), input_length=1)
    user_latent = Flatten()(MF_Embedding_User(user_input))
    item_latent = Flatten()(MF_Embedding_Item(item_input))
    predict_vector = Multiply()([user_latent, item_latent])
    prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name = 'prediction')(predict_vector)
    model = Model(inputs=[user_input, item_input], outputs=prediction)
    return model 
def test_tiny_image_captioning(self):
        # use a conv layer as a image feature branch
        img_input_1 = Input(shape=(16, 16, 3))
        x = Conv2D(2, (3, 3))(img_input_1)
        x = Flatten()(x)
        img_model = Model(inputs=[img_input_1], outputs=[x])

        img_input = Input(shape=(16, 16, 3))
        x = img_model(img_input)
        x = Dense(8, name="cap_dense")(x)
        x = Reshape((1, 8), name="cap_reshape")(x)

        sentence_input = Input(shape=(5,))  # max_length = 5
        y = Embedding(8, 8, name="cap_embedding")(sentence_input)
        z = concatenate([x, y], axis=1, name="cap_merge")
        z = LSTM(4, return_sequences=True, name="cap_lstm")(z)
        z = TimeDistributed(Dense(8), name="cap_timedistributed")(z)

        combined_model = Model(inputs=[img_input, sentence_input], outputs=[z])
        self._test_model(combined_model, one_dim_seq_flags=[False, True]) 
def test_tiny_image_captioning_feature_merge(self):

        img_input_1 = Input(shape=(16, 16, 3))
        x = Conv2D(2, (3, 3))(img_input_1)
        x = Flatten()(x)
        img_model = Model([img_input_1], [x])

        img_input = Input(shape=(16, 16, 3))
        x = img_model(img_input)
        x = Dense(8, name="cap_dense")(x)
        x = Reshape((1, 8), name="cap_reshape")(x)

        sentence_input = Input(shape=(5,))  # max_length = 5
        y = Embedding(8, 8, name="cap_embedding")(sentence_input)
        z = concatenate([x, y], axis=1, name="cap_merge")

        combined_model = Model(inputs=[img_input, sentence_input], outputs=[z])
        self._test_model(combined_model, one_dim_seq_flags=[False, True]) 
def test_conv_batch_1d(self):
        vocabulary_size = 4
        embedding_dimension = 6
        input_length = 10

        model = Sequential()

        model.add(Conv1D(5, 2))


        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
        self._test_model(model, one_dim_seq_flags=[True]) 
def get_model_41(params):
    embedding_weights = pickle.load(open("../data/datasets/train_data/","rb"))
    # main sequential model
    model = Sequential()
    model.add(Embedding(len(embedding_weights[0]), params['embedding_dim'], input_length=params['sequence_length'],
    #model.add(Dropout(params['dropout_prob'][0], input_shape=(params['sequence_length'], params['embedding_dim'])))
    model.add(Dense(output_dim=params["n_out"], init="uniform"))
    logging.debug("Output CNN: %s" % str(model.output_shape))

    if params['final_activation'] == 'linear':
        model.add(Lambda(lambda x :K.l2_normalize(x, axis=1)))

    return model

# CRNN Arch for audio 
def create_model():
    inputs = Input(shape=(length,), dtype='int32', name='inputs')
    embedding_1 = Embedding(len(vocab), EMBED_DIM, input_length=length, mask_zero=True)(inputs)
    bilstm = Bidirectional(LSTM(EMBED_DIM // 2, return_sequences=True))(embedding_1)
    bilstm_dropout = Dropout(DROPOUT_RATE)(bilstm)
    embedding_2 = Embedding(len(vocab), EMBED_DIM, input_length=length)(inputs)
    con = Conv1D(filters=FILTERS, kernel_size=2 * HALF_WIN_SIZE + 1, padding='same')(embedding_2)
    con_d = Dropout(DROPOUT_RATE)(con)
    dense_con = TimeDistributed(Dense(DENSE_DIM))(con_d)
    rnn_cnn = concatenate([bilstm_dropout, dense_con], axis=2)
    dense = TimeDistributed(Dense(len(chunk_tags)))(rnn_cnn)
    crf = CRF(len(chunk_tags), sparse_target=True)
    crf_output = crf(dense)
    model = Model(input=[inputs], output=[crf_output])
    model.compile(loss=crf.loss_function, optimizer=Adam(), metrics=[crf.accuracy])
    return model 
def test_tiny_concat_seq_random(self):
        max_features = 10
        embedding_dims = 4
        seq_len = 5
        num_channels = 6

        # Define a model
        input_tensor = Input(shape=(seq_len,))
        x1 = Embedding(max_features, embedding_dims)(input_tensor)
        x2 = Embedding(max_features, embedding_dims)(input_tensor)
        x3 = concatenate([x1, x2], axis=1)

        model = Model(inputs=[input_tensor], outputs=[x3])

        # Set some random weights
        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])

        # Get the coreml model
        self._test_model(model, one_dim_seq_flags=[True]) 
def get_model(num_users, num_items, layers = [20,10], reg_layers=[0,0]):
    assert len(layers) == len(reg_layers)
    num_layer = len(layers) #Number of layers in the MLP
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
    item_input = Input(shape=(1,), dtype='int32', name = 'item_input')
    MLP_Embedding_User = Embedding(input_dim = num_users, output_dim = int(layers[0]/2), name = 'user_embedding',
                                  embeddings_regularizer = l2(reg_layers[0]), input_length=1)
    MLP_Embedding_Item = Embedding(input_dim = num_items, output_dim = int(layers[0]/2), name = 'item_embedding',
                                  embeddings_regularizer = l2(reg_layers[0]), input_length=1)
    # Crucial to flatten an embedding vector!
    user_latent = Flatten()(MLP_Embedding_User(user_input))
    item_latent = Flatten()(MLP_Embedding_Item(item_input))
    # The 0-th layer is the concatenation of embedding layers
    vector = Concatenate(axis=-1)([user_latent, item_latent])
    # MLP layers
    for idx in range(1, num_layer):
        layer = Dense(layers[idx], W_regularizer= l2(reg_layers[idx]), activation='relu', name = 'layer%d' %idx)
        vector = layer(vector)
    # Final prediction layer
    prediction = Dense(1, activation='sigmoid', init='lecun_uniform', name = 'prediction')(vector)
    model = Model(input=[user_input, item_input], 
    return model 
def parse_args():
    parser = argparse.ArgumentParser(description='Run GMF')
    parser.add_argument('--path', nargs='?', default='Data/', help='Input data path')
    parser.add_argument('--dataset', nargs='?', default='ml-1m', help='Choose a dataset.')
    parser.add_argument('--epochs', type=int, default=1, help='Number of epochs.')
    parser.add_argument('--batch_size', type=int, default=256, help='Batch size.')
    parser.add_argument('--num_factors', type=int, default=8, help='Embedding size.')
    parser.add_argument('--regs', nargs='?', default='[0,0]', help="Regularization for user and item embeddings.")
    parser.add_argument('--num_neg', type=int, default=4, help='Number of negative instances to pair with a positive instance.')
    parser.add_argument('--lr', type=float, default=0.001, help='Learning rate.')
    parser.add_argument('--learner', nargs='?', default='adam', help='Specify an optimizer: adagrad, adam, rmsprop, sgd')
    parser.add_argument('--verbose', type=int, default=1, help='Show performance per X iterations')
    parser.add_argument('--out', type=int, default=1, help='Whether to save the trained model.')
    return parser.parse_args() 
def parse_args():
    parser = argparse.ArgumentParser(description="Run NeuMF.")
    parser.add_argument('--path', nargs='?', default='Data/',
                        help='Input data path.')
    parser.add_argument('--dataset', nargs='?', default='ml-1m',
                        help='Choose a dataset.')
    parser.add_argument('--epochs', type=int, default=1,
                        help='Number of epochs.')
    parser.add_argument('--batch_size', type=int, default=256,
                        help='Batch size.')
    parser.add_argument('--num_factors', type=int, default=8,
                        help='Embedding size of MF model.')
    parser.add_argument('--layers', nargs='?', default='[64,32,16,8]',
                        help="MLP layers. Note that the first layer is the concatenation of user and item embeddings. So layers[0]/2 is the embedding size.")
    parser.add_argument('--reg_mf', type=float, default=0,
                        help='Regularization for MF embeddings.')                    
    parser.add_argument('--reg_layers', nargs='?', default='[0,0,0,0]',
                        help="Regularization for each MLP layer. reg_layers[0] is the regularization for embeddings.")
    parser.add_argument('--num_neg', type=int, default=4,
                        help='Number of negative instances to pair with a positive instance.')
    parser.add_argument('--lr', type=float, default=0.001,
                        help='Learning rate.')
    parser.add_argument('--learner', nargs='?', default='adam',
                        help='Specify an optimizer: adagrad, adam, rmsprop, sgd')
    parser.add_argument('--verbose', type=int, default=1,
                        help='Show performance per X iterations')
    parser.add_argument('--out', type=int, default=1,
                        help='Whether to save the trained model.')
    parser.add_argument('--mf_pretrain', nargs='?', default='',
                        help='Specify the pretrain model file for MF part. If empty, no pretrain will be used')
    parser.add_argument('--mlp_pretrain', nargs='?', default='',
                        help='Specify the pretrain model file for MLP part. If empty, no pretrain will be used')
    return parser.parse_args()

Source File:    From text-classifier with Apache License 2.0 5 votes vote down vote up
def rnn_model(max_len=400,
    print("Bidirectional LSTM...")
    inputs = Input(shape=(max_len,), dtype='int32')
    embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim,
                          input_length=max_len, name="embedding")(inputs)
    lstm_layer = Bidirectional(LSTM(hidden_dim))(embedding)
    output = Dense(num_classes, activation='softmax')(lstm_layer)
    model = Model(inputs, output)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    return model 
def emit_Embedding(self, IR_node, in_scope=False):

        code = "{:<15} = layers.Embedding(name = '{}', input_dim = {}, output_dim = {}, mask_zero = {})({})".format(
        return code 
Example #26
def test_imdb_fasttext_first_2(self):

        max_features = 10
        max_len = 6
        embedding_dims = 4
        pool_length = 2

        model = Sequential()
        model.add(Embedding(max_features, embedding_dims, input_length=max_len))
        # we add a AveragePooling1D, which will average the embeddings
        # of all words in the document

        self._test_model(model, one_dim_seq_flags=[True]) 
def test_embedding(self, model_precision=_MLMODEL_FULL_PRECISION):
        model = Sequential()
        num_inputs = 10
        num_outputs = 3
        model.add(Embedding(num_inputs, num_outputs))

        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
        self._test_model(model, model_precision=model_precision) 
def test_embedding_seq(self, model_precision=_MLMODEL_FULL_PRECISION):
        model = Sequential()
        num_inputs = 10
        num_outputs = 3
        model.add(Embedding(num_inputs, num_outputs, input_length=7))

        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
            model, one_dim_seq_flags=[True], model_precision=model_precision
def test_embedding_fixed_length(self):
        sequence_length = 5
        vocab_size = 10
        embed_channels = 4

        dense_units = sequence_length * embed_channels
        model = Sequential()
        model.add(Embedding(vocab_size, embed_channels, input_length=sequence_length))

        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
        self._test_model(model, one_dim_seq_flags=[True]) 
def test_embedding(self):
        from keras.layers import Embedding

        model = Sequential()
        num_inputs = 10
        num_outputs = 3
        model.add(Embedding(num_inputs, num_outputs, input_length=5))

        input_names = ["input"]
        output_names = ["output"]

        spec = keras.convert(model, input_names, output_names).get_spec()


        # Test the model class

        # Test the inputs and outputs
        self.assertEquals(len(spec.description.input), len(input_names))

        # Test the layer parameters.
        layers = spec.neuralNetwork.layers
        layer_0 = layers[0]

        self.assertEquals(layer_0.embedding.inputDim, num_inputs)
        self.assertEquals(layer_0.embedding.outputChannels, num_outputs)

            len(layer_0.embedding.weights.floatValue), num_inputs * num_outputs