python source code of reactionrnn

from keras.layers import Input, Embedding, Dense, GRU
from keras.models import Model, load_model
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
import numpy as np
import json
import h5py
from pkg_resources import resource_filename
from collections import OrderedDict
from keras import backend as K


class reactionrnn:
    MAXLEN = 140
    REACTIONS = ['love', 'wow', 'haha', 'sad', 'angry']

    def __init__(self, weights_path=None,
                 vocab_path=None):

        if weights_path is None:
            weights_path = resource_filename(__name__,
                                             'reactionrnn_weights.hdf5')

        if vocab_path is None:
            vocab_path = resource_filename(__name__,
                                           'reactionrnn_vocab.json')

        with open(vocab_path, 'r') as json_file:
            self.vocab = json.load(json_file)

        self.tokenizer = Tokenizer(filters='', char_level=True)
        self.tokenizer.word_index = self.vocab
        self.num_classes = len(self.vocab) + 1
        self.model = reactionrnn_model(weights_path, self.num_classes)
        self.model_enc = Model(inputs=self.model.input,
                               outputs=self.model.get_layer('rnn').output)

    def predict(self, texts, **kwargs):
        texts_enc = reactionrnn_encode_sequences(texts, self.tokenizer)
        predicts = self.model.predict(texts_enc, batch_size=1)
        if len(texts_enc) == 1:
            predicts_dict = {react: round(float(predicts[0][i]), 4)
                             for i, react in enumerate(self.REACTIONS)}
            predicts_dict = OrderedDict(sorted(predicts_dict.items(),
                                               key=lambda t: -t[1]))
            return predicts_dict
        return predicts

    def predict_label(self, texts, **kwargs):
        texts_enc = reactionrnn_encode_sequences(texts, self.tokenizer)
        predicts = self.model.predict(texts_enc, batch_size=1)
        return list(np.array(self.REACTIONS)[np.argmax(predicts, axis=1)])

    def encode(self, texts, **kwargs):
        text_enc = reactionrnn_encode_sequences(texts, self.tokenizer)
        predicts = self.model_enc.predict(text_enc)
        return predicts


def reactionrnn_model(weights_path, num_classes, maxlen=140):
    '''
    Builds the model architecture for textgenrnn and
    loads the pretrained weights for the model.
    '''

    input = Input(shape=(maxlen,), name='input')
    embedded = Embedding(num_classes, 100, input_length=maxlen,
                         name='embedding')(input)
    rnn = GRU(256, return_sequences=False, name='rnn')(embedded)
    output = Dense(5, name='output',
                   activation=lambda x: K.relu(x) / K.sum(K.relu(x),
                                                          axis=-1))(rnn)

    model = Model(inputs=[input], outputs=[output])
    model.load_weights(weights_path, by_name=True)
    model.compile(loss='mse', optimizer='nadam')
    return model


def reactionrnn_encode_sequences(texts, tokenizer, maxlen=140):
    '''
    Encodes text(s) into the corresponding encoding(s) for prediction(s) with
    the model.
    '''

    texts = texts if isinstance(texts, list) else [texts]
    texts_enc = tokenizer.texts_to_sequences(texts)
    texts_enc = sequence.pad_sequences(texts_enc, maxlen=maxlen)
    return texts_enc