Python keras.backend.tanh() Examples

The following are 30 code examples of keras.backend.tanh(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.backend , or try the search function .
Example #1
Source File: submission_v50.py    From Quora with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example #2
Source File: attention_layer.py    From text-classifier with Apache License 2.0 6 votes vote down vote up
def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)

        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return output 
Example #3
Source File: my_layers.py    From Attention-Based-Aspect-Extraction with Apache License 2.0 6 votes vote down vote up
def call(self, input_tensor, mask=None):
        x = input_tensor[0]
        y = input_tensor[1]
        mask = mask[0]

        y = K.transpose(K.dot(self.W, K.transpose(y)))
        y = K.expand_dims(y, axis=-2)
        y = K.repeat_elements(y, self.steps, axis=1)
        eij = K.sum(x * y, axis=-1)

        if self.bias:
            b = K.repeat_elements(self.b, self.steps, axis=0)
            eij += b

        eij = K.tanh(eij)
        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        return a 
Example #4
Source File: attention_with_context.py    From DeepResearch with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)
        ait = dot_product(uit, self.u)

        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example #5
Source File: attention.py    From deephlapan with GNU General Public License v2.0 6 votes vote down vote up
def call(self, x, mask=None):
        eij = dot_product(x, self.W)

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        weighted_input = x * K.expand_dims(a)

        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result 
Example #6
Source File: my_layers.py    From Unsupervised-Aspect-Extraction with Apache License 2.0 6 votes vote down vote up
def call(self, input_tensor, mask=None):
        x = input_tensor[0]
        y = input_tensor[1]
        mask = mask[0]

        y = K.transpose(K.dot(self.W, K.transpose(y)))
        y = K.expand_dims(y, dim=-2)
        y = K.repeat_elements(y, self.steps, axis=1)
        eij = K.sum(x*y, axis=-1)

        if self.bias:
            b = K.repeat_elements(self.b, self.steps, axis=0)
            eij += b

        eij = K.tanh(eij)
        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        return a 
Example #7
Source File: textClassifierHATT.py    From textClassifier with Apache License 2.0 6 votes vote down vote up
def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)

        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return output 
Example #8
Source File: models.py    From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)
        ait = dot_product(uit, self.u)

        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example #9
Source File: layers.py    From keras-text with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        # x: [..., time_steps, features]
        # ut = [..., time_steps, attention_dims]
        ut = K.dot(x, self.kernel)
        if self.use_bias:
            ut = K.bias_add(ut, self.bias)

        ut = K.tanh(ut)
        if self.use_context:
            ut = ut * self.context_kernel

        # Collapse `attention_dims` to 1. This indicates the weight for each time_step.
        ut = K.sum(ut, axis=-1, keepdims=True)

        # Convert those weights into a distribution but along time axis.
        # i.e., sum of alphas along `time_steps` axis should be 1.
        self.at = _softmax(ut, dim=1)
        if mask is not None:
            self.at *= K.cast(K.expand_dims(mask, -1), K.floatx())

        # Weighted sum along `time_steps` axis.
        return K.sum(x * self.at, axis=-2) 
Example #10
Source File: attentive_convlstm.py    From sam with MIT License 6 votes vote down vote up
def __init__(self, nb_filters_in, nb_filters_out, nb_filters_att, nb_rows, nb_cols,
                 init='normal', inner_init='orthogonal', attentive_init='zero',
                 activation='tanh', inner_activation='sigmoid',
                 W_regularizer=None, U_regularizer=None,
                 weights=None, go_backwards=False,
                 **kwargs):
        self.nb_filters_in = nb_filters_in
        self.nb_filters_out = nb_filters_out
        self.nb_filters_att = nb_filters_att
        self.nb_rows = nb_rows
        self.nb_cols = nb_cols
        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.attentive_init = initializations.get(attentive_init)
        self.activation = activations.get(activation)
        self.inner_activation = activations.get(inner_activation)
        self.initial_weights = weights
        self.go_backwards = go_backwards

        self.W_regularizer = W_regularizer
        self.U_regularizer = U_regularizer
        self.input_spec = [InputSpec(ndim=5)]

        super(AttentiveConvLSTM, self).__init__(**kwargs) 
Example #11
Source File: attentive_convlstm.py    From sam with MIT License 6 votes vote down vote up
def step(self, x, states):
        x_shape = K.shape(x)
        h_tm1 = states[0]
        c_tm1 = states[1]

        e = self.V_a(K.tanh(self.W_a(h_tm1) + self.U_a(x)))
        a = K.reshape(K.softmax(K.batch_flatten(e)), (x_shape[0], 1, x_shape[2], x_shape[3]))
        x_tilde = x * K.repeat_elements(a, x_shape[1], 1)

        x_i = self.W_i(x_tilde)
        x_f = self.W_f(x_tilde)
        x_c = self.W_c(x_tilde)
        x_o = self.W_o(x_tilde)

        i = self.inner_activation(x_i + self.U_i(h_tm1))
        f = self.inner_activation(x_f + self.U_f(h_tm1))
        c = f * c_tm1 + i * self.activation(x_c + self.U_c(h_tm1))
        o = self.inner_activation(x_o + self.U_o(h_tm1))

        h = o * self.activation(c)
        return h, [h, c] 
Example #12
Source File: Attention.py    From delft with Apache License 2.0 6 votes vote down vote up
def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example #13
Source File: neural_networks.py    From Quora with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example #14
Source File: submission_v40.py    From Quora with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example #15
Source File: insample.py    From Quora with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example #16
Source File: multi_perspective_layer.py    From MatchZoo with Apache License 2.0 6 votes vote down vote up
def _cosine_distance(v1, v2, cosine_norm=True, eps=1e-6):
    """
    Only requires `tf.reduce_sum(v1 * v2, axis=-1)`.

    :param v1: [batch, time_steps(v1), 1, m, d]
    :param v2: [batch, 1, time_steps(v2), m, d]
    :param cosine_norm: True
    :param eps: 1e-6
    :return: [batch, time_steps(v1), time_steps(v2), m]
    """
    cosine_numerator = tf.reduce_sum(v1 * v2, axis=-1)
    if not cosine_norm:
        return K.tanh(cosine_numerator)
    v1_norm = K.sqrt(tf.maximum(tf.reduce_sum(tf.square(v1), axis=-1), eps))
    v2_norm = K.sqrt(tf.maximum(tf.reduce_sum(tf.square(v2), axis=-1), eps))
    return cosine_numerator / v1_norm / v2_norm 
Example #17
Source File: my_layers.py    From Aspect-level-sentiment with Apache License 2.0 6 votes vote down vote up
def call(self, input_tensor, mask=None):
        x = input_tensor[0]
        aspect = input_tensor[1]
        mask = mask[0]

        aspect = K.transpose(K.dot(self.W, K.transpose(aspect)))
        aspect = K.expand_dims(aspect, axis=-2)
        aspect = K.repeat_elements(aspect, self.steps, axis=1)
        eij = K.sum(x*aspect, axis=-1)

        if self.bias:
            b = K.repeat_elements(self.b, self.steps, axis=0)
            eij += b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        return a 
Example #18
Source File: models.py    From keras_attention with MIT License 6 votes vote down vote up
def call(self, h, mask=None):
        h_shape = K.shape(h)
        d_w, T = h_shape[0], h_shape[1]
        
        logits = K.dot(h, self.w)  # w^T h
        logits = K.reshape(logits, (d_w, T))
        alpha = K.exp(logits - K.max(logits, axis=-1, keepdims=True))  # exp
        
        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            alpha = alpha * mask
        alpha = alpha / K.sum(alpha, axis=1, keepdims=True) # softmax
        r = K.sum(h * K.expand_dims(alpha), axis=1)  # r = h*alpha^T
        h_star = K.tanh(r)  # h^* = tanh(r)
        if self.return_attention:
            return [h_star, alpha]
        return h_star 
Example #19
Source File: multi_dim_attention.py    From nlp_toolkit with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        uit = K.tanh(K.dot(x, self.Ws1))
        ait = K.dot(uit, self.Ws2)
        ait = K.permute_dimensions(ait, (0, 2, 1))
        A = K.softmax(ait, axis=1)
        M = K.batch_dot(A, x)
        if self.punish:
            A_T = K.permute_dimensions(A, (0, 2, 1))
            tile_eye = K.tile(K.eye(self.weight_ws2), [self.batch_size, 1])
            tile_eye = K.reshape(
                tile_eye, shape=[-1, self.weight_ws2, self.weight_ws2])
            AA_T = K.batch_dot(A, A_T) - tile_eye
            P = K.l2_normalize(AA_T, axis=(1, 2))
            return M, P
        else:
            return M 
Example #20
Source File: attention.py    From nlp_toolkit with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        # MLP
        ut = K.dot(x, self.kernel)
        if self.use_bias:
            ut = K.bias_add(ut, self.bias)
        if self.activation:
            ut = K.tanh(ut)
        if self.context_kernel:
            ut = K.dot(ut, self.context_kernel)
        ut = K.squeeze(ut, axis=-1)
        # softmax
        at = K.exp(ut - K.max(ut, axis=-1, keepdims=True))
        if mask is not None:
            at *= K.cast(mask, K.floatx())
        att_weights = at / (K.sum(at, axis=1, keepdims=True) + K.epsilon())
        # output
        atx = x * K.expand_dims(att_weights, axis=-1)
        output = K.sum(atx, axis=1)
        if self.return_attention:
            return [output, att_weights]
        return output 
Example #21
Source File: QuestionAttnGRU.py    From R-NET-in-Keras with MIT License 6 votes vote down vote up
def step(self, inputs, states):
        uP_t = inputs
        vP_tm1 = states[0]
        _ = states[1:3] # ignore internal dropout/masks
        uQ, WQ_u, WP_v, WP_u, v, W_g1 = states[3:9]
        uQ_mask, = states[9:10]

        WQ_u_Dot = K.dot(uQ, WQ_u) #WQ_u
        WP_v_Dot = K.dot(K.expand_dims(vP_tm1, axis=1), WP_v) #WP_v
        WP_u_Dot = K.dot(K.expand_dims(uP_t, axis=1), WP_u) # WP_u

        s_t_hat = K.tanh(WQ_u_Dot + WP_v_Dot + WP_u_Dot)

        s_t = K.dot(s_t_hat, v) # v
        s_t = K.batch_flatten(s_t)
        a_t = softmax(s_t, mask=uQ_mask, axis=1)
        c_t = K.batch_dot(a_t, uQ, axes=[1, 1])

        GRU_inputs = K.concatenate([uP_t, c_t])
        g = K.sigmoid(K.dot(GRU_inputs, W_g1))  # W_g1
        GRU_inputs = g * GRU_inputs
        vP_t, s = super(QuestionAttnGRU, self).step(GRU_inputs, states)

        return vP_t, s 
Example #22
Source File: QuestionPooling.py    From R-NET-in-Keras with MIT License 6 votes vote down vote up
def call(self, inputs, mask=None):
        assert(isinstance(inputs, list) and len(inputs) == 5)
        uQ, WQ_u, WQ_v, v, VQ_r = inputs
        uQ_mask = mask[0] if mask is not None else None

        ones = K.ones_like(K.sum(uQ, axis=1, keepdims=True)) # (B, 1, 2H)
        s_hat = K.dot(uQ, WQ_u)
        s_hat += K.dot(ones, K.dot(WQ_v, VQ_r))
        s_hat = K.tanh(s_hat)
        s = K.dot(s_hat, v)
        s = K.batch_flatten(s)

        a = softmax(s, mask=uQ_mask, axis=1)

        rQ = K.batch_dot(uQ, a, axes=[1, 1])

        return rQ 
Example #23
Source File: PointerGRU.py    From R-NET-in-Keras with MIT License 6 votes vote down vote up
def step(self, inputs, states):
        # input
        ha_tm1 = states[0] # (B, 2H)
        _ = states[1:3] # ignore internal dropout/masks
        hP, WP_h, Wa_h, v = states[3:7] # (B, P, 2H)
        hP_mask, = states[7:8]

        WP_h_Dot = K.dot(hP, WP_h) # (B, P, H)
        Wa_h_Dot = K.dot(K.expand_dims(ha_tm1, axis=1), Wa_h) # (B, 1, H)

        s_t_hat = K.tanh(WP_h_Dot + Wa_h_Dot) # (B, P, H)
        s_t = K.dot(s_t_hat, v) # (B, P, 1)
        s_t = K.batch_flatten(s_t) # (B, P)
        a_t = softmax(s_t, mask=hP_mask, axis=1) # (B, P)
        c_t = K.batch_dot(hP, a_t, axes=[1, 1]) # (B, 2H)

        GRU_inputs = c_t
        ha_t, (ha_t_,) = super(PointerGRU, self).step(GRU_inputs, states)
        
        return a_t, [ha_t] 
Example #24
Source File: SelfAttnGRU.py    From R-NET-in-Keras with MIT License 6 votes vote down vote up
def step(self, inputs, states):
        vP_t = inputs
        hP_tm1 = states[0]
        _ = states[1:3] # ignore internal dropout/masks 
        vP, WP_v, WPP_v, v, W_g2 = states[3:8]
        vP_mask, = states[8:]

        WP_v_Dot = K.dot(vP, WP_v)
        WPP_v_Dot = K.dot(K.expand_dims(vP_t, axis=1), WPP_v)

        s_t_hat = K.tanh(WPP_v_Dot + WP_v_Dot)
        s_t = K.dot(s_t_hat, v)
        s_t = K.batch_flatten(s_t)

        a_t = softmax(s_t, mask=vP_mask, axis=1)

        c_t = K.batch_dot(a_t, vP, axes=[1, 1])
        
        GRU_inputs = K.concatenate([vP_t, c_t])
        g = K.sigmoid(K.dot(GRU_inputs, W_g2))
        GRU_inputs = g * GRU_inputs
        
        hP_t, s = super(SelfAttnGRU, self).step(GRU_inputs, states)

        return hP_t, s 
Example #25
Source File: pooling.py    From onto-lstm with Apache License 2.0 6 votes vote down vote up
def call(self, x, mask=None):
        mean = super(IntraAttention, self).call(x, mask)
        # x: (batch_size, input_length, input_dim)
        # mean: (batch_size, input_dim)
        ones = K.expand_dims(K.mean(K.ones_like(x), axis=(0, 2)), dim=0)  # (1, input_length)
        # (batch_size, input_length, input_dim)
        tiled_mean = K.permute_dimensions(K.dot(K.expand_dims(mean), ones), (0, 2, 1))
        if mask is not None:
            if K.ndim(mask) > K.ndim(x):
                # Assuming this is because of the bug in Bidirectional. Temporary fix follows.
                # TODO: Fix Bidirectional.
                mask = K.any(mask, axis=(-2, -1))
            if K.ndim(mask) < K.ndim(x):
                mask = K.expand_dims(mask)
            x = switch(mask, x, K.zeros_like(x))
        # (batch_size, input_length, proj_dim)
        projected_combination = K.tanh(K.dot(x, self.vector_projector) + K.dot(tiled_mean, self.mean_projector))
        scores = K.dot(projected_combination, self.scorer)  # (batch_size, input_length)
        weights = K.softmax(scores)  # (batch_size, input_length)
        attended_x = K.sum(K.expand_dims(weights) * x, axis=1)  # (batch_size, input_dim)
        return attended_x 
Example #26
Source File: layers.py    From keras-han-for-docla with MIT License 6 votes vote down vote up
def _get_attention_weights(self, X):
        """
        Computes the attention weights for each timestep in X
        :param X: 3d-tensor (batch_size, time_steps, input_dim)
        :return: 2d-tensor (batch_size, time_steps) of attention weights
        """
        # Compute a time-wise stimulus, i.e. a stimulus for each
        # time step. For this first compute a hidden layer of
        # dimension self.context_vector_length and take the
        # similarity of this layer with self.u as the stimulus
        u_tw = K.tanh(K.dot(X, self.W))
        tw_stimulus = K.dot(u_tw, self.u)

        # Remove the last axis an apply softmax to the stimulus to
        # get a probability.
        tw_stimulus = K.reshape(tw_stimulus, (-1, tw_stimulus.shape[1]))
        att_weights = K.softmax(tw_stimulus)

        return att_weights 
Example #27
Source File: rnnlayer.py    From recurrent-attention-for-QA-SQUAD-based-on-keras with MIT License 6 votes vote down vote up
def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a = K.dot(h_tm1, self.Wa)
        eij = K.dot(K.tanh(h_tm1a + K.dot(inputs[:, :self.h_dim], self.Ua)), self.Va)
        eijs = K.repeat_elements(eij, self.h_dim, axis=1)

        #alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        #cisum = K.sum(ci, axis=1)
        cisum = eijs*inputs[:, :self.h_dim]
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid(K.dot(inputs[:, self.h_dim:], self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh(K.dot(inputs[:, self.h_dim:], self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1] 
Example #28
Source File: rnnlayer.py    From recurrent-attention-for-QA-SQUAD-based-on-keras with MIT License 6 votes vote down vote up
def __init__(self, units, h, h_dim,
                 kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal',
                 #activation='tanh', inner_activation='hard_sigmoid',
                 #W_regularizer=None, U_regularizer=None, b_regularizer=None,
                 #dropout_W=0., dropout_U=0., 
                 **kwargs):
        self.units = units
        self.h = h[:,-1,:]
        self.h_dim = h_dim
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.recurrent_initializer = initializers.get(recurrent_initializer)
        #self.activation = activations.get(activation)
        #self.inner_activation = activations.get(inner_activation)
        #self.W_regularizer = regularizers.get(W_regularizer)
        #self.U_regularizer = regularizers.get(U_regularizer)
        #self.b_regularizer = regularizers.get(b_regularizer)
        #self.dropout_W = dropout_W
        #self.dropout_U = dropout_U

        #if self.dropout_W or self.dropout_U:
        #    self.uses_learning_phase = True
        super(Attention, self).__init__(**kwargs) 
Example #29
Source File: rnnlayer.py    From recurrent-attention-for-QA-SQUAD-based-on-keras with MIT License 6 votes vote down vote up
def __init__(self, units, h, h_dim,
                 kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal',
                 #activation='tanh', inner_activation='hard_sigmoid',
                 #W_regularizer=None, U_regularizer=None, b_regularizer=None,
                 #dropout_W=0., dropout_U=0., 
                 **kwargs):
        self.units = units
        self.h = h[:,-1,:]
        self.h_dim = h_dim
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.recurrent_initializer = initializers.get(recurrent_initializer)
        #self.activation = activations.get(activation)
        #self.inner_activation = activations.get(inner_activation)
        #self.W_regularizer = regularizers.get(W_regularizer)
        #self.U_regularizer = regularizers.get(U_regularizer)
        #self.b_regularizer = regularizers.get(b_regularizer)
        #self.dropout_W = dropout_W
        #self.dropout_U = dropout_U

        #if self.dropout_W or self.dropout_U:
        #    self.uses_learning_phase = True
        super(SSimpleAttention, self).__init__(**kwargs) 
Example #30
Source File: rnnlayer.py    From recurrent-attention-for-QA-SQUAD-based-on-keras with MIT License 6 votes vote down vote up
def __init__(self, units, h, h_dim,
                 kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal',
                 #activation='tanh', inner_activation='hard_sigmoid',
                 #W_regularizer=None, U_regularizer=None, b_regularizer=None,
                 #dropout_W=0., dropout_U=0., 
                 **kwargs):
        self.units = units
        self.h = h
        self.h_dim = h_dim
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.recurrent_initializer = initializers.get(recurrent_initializer)
        #self.activation = activations.get(activation)
        #self.inner_activation = activations.get(inner_activation)
        #self.W_regularizer = regularizers.get(W_regularizer)
        #self.U_regularizer = regularizers.get(U_regularizer)
        #self.b_regularizer = regularizers.get(b_regularizer)
        #self.dropout_W = dropout_W
        #self.dropout_U = dropout_U

        #if self.dropout_W or self.dropout_U:
        #    self.uses_learning_phase = True
        super(SimpleAttention, self).__init__(**kwargs)