Python keras.backend.repeat_elements() Examples

The following are code examples for showing how to use keras.backend.repeat_elements(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: 360_aware_saliency   Author: MikhailStartsev   File: models.py    GNU General Public License v3.0 7 votes vote down vote up
def nss(y_true, y_pred):
    max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    y_pred /= max_y_pred
    y_pred_flatten = K.batch_flatten(y_pred)

    y_mean = K.mean(y_pred_flatten, axis=-1)
    y_mean = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.expand_dims(y_mean)), 
                                                               shape_r_out, axis=-1)), shape_c_out, axis=-1)

    y_std = K.std(y_pred_flatten, axis=-1)
    y_std = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.expand_dims(y_std)), 
                                                              shape_r_out, axis=-1)), shape_c_out, axis=-1)

    y_pred = (y_pred - y_mean) / (y_std + K.epsilon())

    return -(K.sum(K.sum(y_true * y_pred, axis=2), axis=2) / K.sum(K.sum(y_true, axis=2), axis=2))


# Gaussian priors initialization 
Example 2
Project: keras-utility-layer-collection   Author: zimmerrol   File: layer_normalization.py    MIT License 6 votes vote down vote up
def call(self, x):
        mean = K.mean(x, axis=-1)
        std = K.std(x, axis=-1)

        if len(x.shape) == 3:
            mean = K.permute_dimensions(
                K.repeat(mean, x.shape.as_list()[-1]),
                [0,2,1]
            )
            std = K.permute_dimensions(
                K.repeat(std, x.shape.as_list()[-1]),
                [0,2,1] 
            )
            
        elif len(x.shape) == 2:
            mean = K.reshape(
                K.repeat_elements(mean, x.shape.as_list()[-1], 0),
                (-1, x.shape.as_list()[-1])
            )
            std = K.reshape(
                K.repeat_elements(mean, x.shape.as_list()[-1], 0),
                (-1, x.shape.as_list()[-1])
            )
        
        return self._g * (x - mean) / (std + self._epsilon) + self._b 
Example 3
Project: IJCAI_Keras_Defense   Author: gujingxiao   File: denseMoE.py    Apache License 2.0 6 votes vote down vote up
def call(self, inputs):

        expert_outputs = tf.tensordot(inputs, self.expert_kernel, axes=1)
        if self.use_expert_bias:
            expert_outputs = K.bias_add(expert_outputs, self.expert_bias)
        if self.expert_activation is not None:
            expert_outputs = self.expert_activation(expert_outputs)

        gating_outputs = K.dot(inputs, self.gating_kernel)
        if self.use_gating_bias:
            gating_outputs = K.bias_add(gating_outputs, self.gating_bias)
        if self.gating_activation is not None:
            gating_outputs = self.gating_activation(gating_outputs)

        output = K.sum(expert_outputs * K.repeat_elements(K.expand_dims(gating_outputs, axis=1), self.units, axis=1), axis=2)

        return output 
Example 4
Project: IJCAI_Keras_Defense   Author: gujingxiao   File: denseMoE.py    Apache License 2.0 6 votes vote down vote up
def call(self, inputs):

        expert_outputs = tf.tensordot(inputs, self.expert_kernel, axes=1)
        if self.use_expert_bias:
            expert_outputs = K.bias_add(expert_outputs, self.expert_bias)
        if self.expert_activation is not None:
            expert_outputs = self.expert_activation(expert_outputs)

        gating_outputs = K.dot(inputs, self.gating_kernel)
        if self.use_gating_bias:
            gating_outputs = K.bias_add(gating_outputs, self.gating_bias)
        if self.gating_activation is not None:
            gating_outputs = self.gating_activation(gating_outputs)

        output = K.sum(expert_outputs * K.repeat_elements(K.expand_dims(gating_outputs, axis=1), self.units, axis=1), axis=2)

        return output 
Example 5
Project: 360_aware_saliency   Author: MikhailStartsev   File: models.py    GNU General Public License v3.0 6 votes vote down vote up
def kl_divergence(y_true, y_pred):
    max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    y_pred /= max_y_pred

    sum_y_true = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_true, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    sum_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    y_true /= (sum_y_true + K.epsilon())
    y_pred /= (sum_y_pred + K.epsilon())

    return 10 * K.sum(K.sum(y_true * K.log((y_true / (y_pred + K.epsilon())) + K.epsilon()), axis=-1), axis=-1)


# Correlation Coefficient Loss 
Example 6
Project: 360_aware_saliency   Author: MikhailStartsev   File: attentive_convlstm.py    GNU General Public License v3.0 6 votes vote down vote up
def step(self, x, states):
        x_shape = K.shape(x)
        h_tm1 = states[0]
        c_tm1 = states[1]

        e = self.V_a(K.tanh(self.W_a(h_tm1) + self.U_a(x)))
        a = K.reshape(K.softmax(K.batch_flatten(e)), (x_shape[0], 1, x_shape[2], x_shape[3]))
        x_tilde = x * K.repeat_elements(a, x_shape[1], 1)

        x_i = self.W_i(x_tilde)
        x_f = self.W_f(x_tilde)
        x_c = self.W_c(x_tilde)
        x_o = self.W_o(x_tilde)

        i = self.inner_activation(x_i + self.U_i(h_tm1))
        f = self.inner_activation(x_f + self.U_f(h_tm1))
        c = f * c_tm1 + i * self.activation(x_c + self.U_c(h_tm1))
        o = self.inner_activation(x_o + self.U_o(h_tm1))

        h = o * self.activation(c)
        return h, [h, c] 
Example 7
Project: yoctol-keras-layer-zoo   Author: Yoctol   File: mask_to_seq.py    GNU General Public License v3.0 6 votes vote down vote up
def call(self, inputs, mask=None):
        input_shape = K.int_shape(inputs)
        outputs = self.layer.call(inputs)
        outputs = K.permute_dimensions(
            outputs,
            self.permute_pattern + [len(input_shape) - 1]
        )
        outputs_shape = self.compute_output_shape(input_shape)
        outputs = K.reshape(
            outputs,
            (-1, outputs_shape[1], outputs_shape[2])
        )

        mask_tensor = self.compute_mask(
            inputs,
            mask
        )
        mask_tensor = K.cast(mask_tensor, K.floatx())
        mask_tensor = K.expand_dims(mask_tensor)
        mask_output = K.repeat_elements(
            mask_tensor,
            outputs_shape[2],
            2
        )
        return outputs * mask_output 
Example 8
Project: cor-asv-ann   Author: ASVLeipzig   File: seq2seq.py    Apache License 2.0 6 votes vote down vote up
def _regularise_chars(self, embedding_matrix):
        '''Calculate L2 loss of the char embedding weights
        to control for underspecification at zero
        (by interpolating between other embedding vectors).
        '''
        from keras import backend as K
        
        em_dims = embedding_matrix.shape.as_list()
        if em_dims[0] == 0: # voc_size starts with 0 before first training
            return 0
        vec0 = K.slice(embedding_matrix, [0, 0], [1, em_dims[1]])            # zero vector only,
        #vec0 = K.repeat_elements(vec0, em_dims[0]-1, axis=0)                 # repeated
        vecs = K.slice(embedding_matrix, [1, 0], [em_dims[0]-1, em_dims[1]]) # all vectors except zero
        # make sure only vec0 is affected, i.e. vecs change only via global loss:
        vecs = K.stop_gradient(K.mean(vecs, axis=0))
        # scale to make gradients benign:
        underspecification = 1 * K.sum(K.square(vec0 - vecs)) # c='\0' ~ mean of others

        #lowrank = K.sum(0.01 * K.square(embedding_matrix)) # generalization/sparsity
        norms = K.sum(K.square(embedding_matrix), axis=1)
        norm0 = K.ones_like(norms) # square of target (non-zero) weight norm
        lowrank = 0.01 * K.sum(K.square(norm0 - norms))
        
        return K.in_train_phase(lowrank + underspecification, 0.) 
Example 9
Project: textcaps   Author: vinojjayasundara   File: capsulelayers.py    MIT License 6 votes vote down vote up
def call(self, inputs, training=None):
        inputs_expand = K.expand_dims(inputs, 1)
        
        inputs_tiled = K.tile(inputs_expand, [1, self.num_capsule, 1, 1])
        
        if(self.channels!=0):
            W2 = K.repeat_elements(self.W,int(self.input_num_capsule/self.channels),1)
        else:
            W2 = self.W
            
        inputs_hat = K.map_fn(lambda x: K.batch_dot(x, W2, [2, 3]) , elems=inputs_tiled)

        b = tf.zeros(shape=[K.shape(inputs_hat)[0], self.num_capsule, self.input_num_capsule])

        assert self.routings > 0, 'The routings should be > 0.'
        for i in range(self.routings):

            c = tf.nn.softmax(b, dim=1)
            outputs = squash(K.batch_dot(c, inputs_hat, [2, 2])+ self.B)

            if i < self.routings - 1:
                b += K.batch_dot(outputs, inputs_hat, [2, 3])

        return outputs 
Example 10
Project: RPGOne   Author: RTHMaK   File: bigru_index_selector.py    Apache License 2.0 6 votes vote down vote up
def call(self, inputs, mask=None):
        """
        Extract the GRU output for the target document index for the forward
        and backwards GRU outputs, and then concatenate them. If the target word index
        is at index l, and there are T total document words, the desired output
        in the forward pass is at GRU_f[l] (ignoring the batched case) and the
        desired output of the backwards pass is at GRU_b[T-l].

        We need to get these two vectors and concatenate them. To do so, we'll
        reverse the backwards GRU, which allows us to use the same index/mask for both.
        """
        # TODO(nelson): deal with case where cloze token appears multiple times
        # in a question.
        word_indices, gru_f, gru_b = inputs
        index_mask = K.cast(K.equal((K.ones_like(word_indices) * self.target_index),
                                    word_indices), "float32")
        gru_mask = K.repeat_elements(K.expand_dims(index_mask, -1), K.int_shape(gru_f)[-1], K.ndim(gru_f) - 1)
        masked_gru_f = switch(gru_mask, gru_f, K.zeros_like(gru_f))
        selected_gru_f = K.sum(masked_gru_f, axis=1)
        masked_gru_b = switch(gru_mask, gru_b, K.zeros_like(gru_b))
        selected_gru_b = K.sum(masked_gru_b, axis=1)
        selected_bigru = K.concatenate([selected_gru_f, selected_gru_b], axis=-1)
        return selected_bigru 
Example 11
Project: RPGOne   Author: RTHMaK   File: matrix_attention.py    Apache License 2.0 6 votes vote down vote up
def call(self, inputs, mask=None):
        """
        NOTE: This does not work if ``num_rows_1`` or ``num_rows_2`` is ``None``!  I tried to get
        it to work, but ``K.dot()`` breaks.
        """
        matrix_1, matrix_2 = inputs
        matrix_1_shape = K.int_shape(matrix_1)
        matrix_2_shape = K.int_shape(matrix_2)
        num_rows_1 = matrix_1_shape[1]
        num_rows_2 = matrix_2_shape[1]
        tiled_matrix_1 = K.repeat_elements(K.expand_dims(matrix_1, axis=2), num_rows_2, axis=2)
        tiled_matrix_2 = K.repeat_elements(K.expand_dims(matrix_2, axis=1), num_rows_1, axis=1)

        # We need to be able to access K.int_shape() in compute_similarity() below, but in theano,
        # calling a backend function makes it so you can't use K.int_shape() anymore.  Setting
        # tensor._keras_shape here fixes that.
        # pylint: disable=protected-access
        tiled_matrix_1._keras_shape = matrix_1_shape[:2] + (num_rows_2,) + matrix_1_shape[2:]
        tiled_matrix_2._keras_shape = matrix_2_shape[:1] + (num_rows_1,) + matrix_2_shape[1:]
        return self.similarity_function.compute_similarity(tiled_matrix_1, tiled_matrix_2) 
Example 12
Project: RPGOne   Author: RTHMaK   File: weighted_sum.py    Apache License 2.0 6 votes vote down vote up
def _expand_matrix_if_necessary(matrix, matrix_shape, attention_vector):
        """
        This function gets the tiles the matrix to have the same shape as the attention vector,
        ignoring the embedding dimension.  We take the shape as input (where the shape already has
        the embedding dimension removed) so we can call this on the mask as well as the input
        matrix.
        """
        attention_shape = K.int_shape(attention_vector)
        if matrix_shape != attention_shape:
            # We'll take care of the batch size first.  After this, the matrix_shape should match
            # the end of the attention_shape exactly.
            assert matrix_shape[0] == attention_shape[0], "somehow batch sizes don't match"
            matrix_shape = matrix_shape[1:]
            attention_shape = attention_shape[1:]
            assert attention_shape[-len(matrix_shape):] == matrix_shape, ("matrix_shape must be "
                                                                          "subset of attention_shape")
            for i in range(len(attention_shape) - len(matrix_shape)):
                matrix = K.expand_dims(matrix, axis=i+1)  # +1 to account for batch_size
                matrix = K.repeat_elements(matrix, attention_shape[i], axis=i+1)
        return matrix 
Example 13
Project: MetaPix   Author: imjal   File: networks.py    MIT License 6 votes vote down vote up
def make_warped_stack(args):
    mask = args[0]
    src_in = args[1]
    trans_in = args[2]

    for i in range(11):
        mask_i = K.repeat_elements(tf.expand_dims(mask[:, :, :, i], 3), 3, 3)
        src_masked = tf.multiply(mask_i, src_in)

        if i == 0:
            warps = src_masked
        else:
            warp_i = affine_warp(src_masked, trans_in[:, :, :, i])
            warps = tf.concat([warps, warp_i], 3)

    return warps 
Example 14
Project: applications   Author: geomstats   File: backend_test.py    MIT License 6 votes vote down vote up
def test_repeat_elements(self):
        reps = 3
        for ndims in [1, 2, 3]:
            shape = np.arange(2, 2 + ndims)
            arr = np.arange(np.prod(shape)).reshape(shape)

            for rep_axis in range(ndims):
                np_rep = np.repeat(arr, reps, axis=rep_axis)
                check_single_tensor_operation('repeat_elements', arr, BACKENDS,
                                              rep=reps, axis=rep_axis,
                                              assert_value_with_ref=np_rep)

                if K.backend() != 'cntk':
                    shape = list(shape)
                    shape[rep_axis] = None
                    x = K.placeholder(shape=shape)
                    y = K.repeat_elements(x, reps, axis=rep_axis)
                    assert y._keras_shape == tuple(shape)
                    assert y._keras_shape == K.int_shape(y) 
Example 15
Project: recurrentshop   Author: farizrahman4u   File: query_reduction_network.py    MIT License 6 votes vote down vote up
def QRNcell():
    xq = Input(batch_shape=(batch_size, embedding_dim * 2))
    # Split into context and query
    xt = Lambda(lambda x, dim: x[:, :dim], arguments={'dim': embedding_dim},
                output_shape=lambda s: (s[0], s[1] / 2))(xq)
    qt = Lambda(lambda x, dim: x[:, dim:], arguments={'dim': embedding_dim},
                output_shape=lambda s: (s[0], s[1] / 2))(xq)

    h_tm1 = Input(batch_shape=(batch_size, embedding_dim))

    zt = Dense(1, activation='sigmoid', bias_initializer=Constant(2.5))(multiply([xt, qt]))
    zt = Lambda(lambda x, dim: K.repeat_elements(x, dim, axis=1), arguments={'dim': embedding_dim})(zt)
    ch = Dense(embedding_dim, activation='tanh')(concatenate([xt, qt], axis=-1))
    rt = Dense(1, activation='sigmoid')(multiply([xt, qt]))
    rt = Lambda(lambda x, dim: K.repeat_elements(x, dim, axis=1), arguments={'dim': embedding_dim})(rt)
    ht = add([multiply([zt, ch, rt]), multiply([Lambda(lambda x: 1 - x, output_shape=lambda s: s)(zt), h_tm1])])
    return RecurrentModel(input=xq, output=ht, initial_states=[h_tm1], final_states=[ht], return_sequences=True)


#
# Load data
# 
Example 16
Project: ASD_classification   Author: MikhailStartsev   File: models.py    GNU General Public License v3.0 6 votes vote down vote up
def kl_divergence(y_true, y_pred):
    max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    y_pred /= max_y_pred

    sum_y_true = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_true, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    sum_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    y_true /= (sum_y_true + K.epsilon())
    y_pred /= (sum_y_pred + K.epsilon())

    return 10 * K.sum(K.sum(y_true * K.log((y_true / (y_pred + K.epsilon())) + K.epsilon()), axis=-1), axis=-1)


# Correlation Coefficient Loss 
Example 17
Project: ASD_classification   Author: MikhailStartsev   File: models.py    GNU General Public License v3.0 6 votes vote down vote up
def nss(y_true, y_pred):
    max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    y_pred /= max_y_pred
    y_pred_flatten = K.batch_flatten(y_pred)

    y_mean = K.mean(y_pred_flatten, axis=-1)
    y_mean = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.expand_dims(y_mean)), 
                                                               shape_r_out, axis=-1)), shape_c_out, axis=-1)

    y_std = K.std(y_pred_flatten, axis=-1)
    y_std = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.expand_dims(y_std)), 
                                                              shape_r_out, axis=-1)), shape_c_out, axis=-1)

    y_pred = (y_pred - y_mean) / (y_std + K.epsilon())

    return -(K.sum(K.sum(y_true * y_pred, axis=2), axis=2) / K.sum(K.sum(y_true, axis=2), axis=2))


# Gaussian priors initialization 
Example 18
Project: ASD_classification   Author: MikhailStartsev   File: attentive_convlstm.py    GNU General Public License v3.0 6 votes vote down vote up
def step(self, x, states):
        x_shape = K.shape(x)
        h_tm1 = states[0]
        c_tm1 = states[1]

        e = self.V_a(K.tanh(self.W_a(h_tm1) + self.U_a(x)))
        a = K.reshape(K.softmax(K.batch_flatten(e)), (x_shape[0], 1, x_shape[2], x_shape[3]))
        x_tilde = x * K.repeat_elements(a, x_shape[1], 1)

        x_i = self.W_i(x_tilde)
        x_f = self.W_f(x_tilde)
        x_c = self.W_c(x_tilde)
        x_o = self.W_o(x_tilde)

        i = self.inner_activation(x_i + self.U_i(h_tm1))
        f = self.inner_activation(x_f + self.U_f(h_tm1))
        c = f * c_tm1 + i * self.activation(x_c + self.U_c(h_tm1))
        o = self.inner_activation(x_o + self.U_o(h_tm1))

        h = o * self.activation(c)
        return h, [h, c] 
Example 19
Project: TemporalActionParsing-FineGrained   Author: yz-cnsdqz   File: tf_models.py    MIT License 6 votes vote down vote up
def call(self, x, **kwargs):
        x_lift = K.expand_dims(x, axis=1) # from [batch, time, channel] to [batch, 1, time, channel]
        x_lift = K.repeat_elements(x_lift, 2, axis=1) # to [batch, 2, time, channel]
        # x_lift_shape = tf.shape(x_lift)
        # x_lift = K.reshape(x_lift, x_lift_shape)
        # print(x_lift_shape)

        # padding 0 to horizental direction
        offset = K.permute_dimensions(self.offset, (0, 2, 1) ) #[batch, time, 1] to [batch, 1, time]
        offset = ZeroPadding1D((1, 0))(offset) # [batch, 1, time] to [batch, 2, time]
        offset = K.permute_dimensions(offset, (0, 2, 1) ) #[batch, 2, time] to [batch, time, 2]
        offset_lift = K.expand_dims(offset, axis=1) # from [batch, time, 2] to [batch, 1, time, 2]
        offset_lift = K.repeat_elements(offset_lift, 2, axis=1) # to [batch, 2, time, 2]    

        # apply image warping
        x_warp_list = tf.contrib.image.dense_image_warp(x_lift, offset_lift)
        x_warp = x_warp_list[:,-1,:,:]

        return x_warp 
Example 20
Project: recurrent-attention-for-QA-SQUAD-based-on-keras   Author: wentaozhu   File: rnnlayer.py    MIT License 6 votes vote down vote up
def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a = K.dot(h_tm1, self.Wa)
        eij = K.dot(K.tanh(h_tm1a + K.dot(inputs[:, :self.h_dim], self.Ua)), self.Va)
        eijs = K.repeat_elements(eij, self.h_dim, axis=1)

        #alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        #cisum = K.sum(ci, axis=1)
        cisum = eijs*inputs[:, :self.h_dim]
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid(K.dot(inputs[:, self.h_dim:], self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh(K.dot(inputs[:, self.h_dim:], self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1] 
Example 21
Project: deep_qa   Author: allenai   File: bigru_index_selector.py    Apache License 2.0 6 votes vote down vote up
def call(self, inputs, mask=None):
        """
        Extract the GRU output for the target document index for the forward
        and backwards GRU outputs, and then concatenate them. If the target word index
        is at index l, and there are T total document words, the desired output
        in the forward pass is at GRU_f[l] (ignoring the batched case) and the
        desired output of the backwards pass is at GRU_b[T-l].

        We need to get these two vectors and concatenate them. To do so, we'll
        reverse the backwards GRU, which allows us to use the same index/mask for both.
        """
        # TODO(nelson): deal with case where cloze token appears multiple times
        # in a question.
        word_indices, gru_f, gru_b = inputs
        index_mask = K.cast(K.equal((K.ones_like(word_indices) * self.target_index),
                                    word_indices), "float32")
        gru_mask = K.repeat_elements(K.expand_dims(index_mask, -1), K.int_shape(gru_f)[-1], K.ndim(gru_f) - 1)
        masked_gru_f = switch(gru_mask, gru_f, K.zeros_like(gru_f))
        selected_gru_f = K.sum(masked_gru_f, axis=1)
        masked_gru_b = switch(gru_mask, gru_b, K.zeros_like(gru_b))
        selected_gru_b = K.sum(masked_gru_b, axis=1)
        selected_bigru = K.concatenate([selected_gru_f, selected_gru_b], axis=-1)
        return selected_bigru 
Example 22
Project: 360_aware_saliency   Author: MikhailStartsev   File: gaussian_prior.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, x, mask=None):
        mu_x = self.W[:self.nb_gaussian]
        mu_y = self.W[self.nb_gaussian:self.nb_gaussian*2]
        sigma_x = self.W[self.nb_gaussian*2:self.nb_gaussian*3]
        sigma_y = self.W[self.nb_gaussian*3:]

        self.b_s = x.shape[0]
        self.height = x.shape[2]
        self.width = x.shape[3]

        e = self.height / self.width
        e1 = (1 - e) / 2
        e2 = e1 + e

        mu_x = K.clip(mu_x, 0.25, 0.75)
        mu_y = K.clip(mu_y, 0.35, 0.65)

        sigma_x = K.clip(sigma_x, 0.1, 0.9)
        sigma_y = K.clip(sigma_y, 0.2, 0.8)

        x_t = T.dot(T.ones((self.height, 1)), self._linspace(0, 1.0, self.width).dimshuffle('x', 0))
        y_t = T.dot(self._linspace(e1, e2, self.height).dimshuffle(0, 'x'), T.ones((1, self.width)))

        x_t = K.repeat_elements(K.expand_dims(x_t, dim=-1), self.nb_gaussian, axis=-1)
        y_t = K.repeat_elements(K.expand_dims(y_t, dim=-1), self.nb_gaussian, axis=-1)

        gaussian = 1 / (2 * np.pi * sigma_x * sigma_y + K.epsilon()) * \
                   T.exp(-((x_t - mu_x) ** 2 / (2 * sigma_x ** 2 + K.epsilon()) +
                           (y_t - mu_y) ** 2 / (2 * sigma_y ** 2 + K.epsilon())))

        gaussian = K.permute_dimensions(gaussian, (2, 0, 1))
        max_gauss = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(gaussian, axis=1), axis=1)), self.height, axis=-1)), self.width, axis=-1)
        gaussian = gaussian / max_gauss

        output = K.repeat_elements(K.expand_dims(gaussian, dim=0), self.b_s, axis=0)

        return output 
Example 23
Project: 360_aware_saliency   Author: MikhailStartsev   File: models.py    GNU General Public License v3.0 5 votes vote down vote up
def correlation_coefficient(y_true, y_pred):
    max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    y_pred /= max_y_pred

    sum_y_true = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_true, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    sum_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)

    y_true /= (sum_y_true + K.epsilon())
    y_pred /= (sum_y_pred + K.epsilon())

    N = shape_r_out * shape_c_out
    sum_prod = K.sum(K.sum(y_true * y_pred, axis=2), axis=2)
    sum_x = K.sum(K.sum(y_true, axis=2), axis=2)
    sum_y = K.sum(K.sum(y_pred, axis=2), axis=2)
    sum_x_square = K.sum(K.sum(K.square(y_true), axis=2), axis=2)
    sum_y_square = K.sum(K.sum(K.square(y_pred), axis=2), axis=2)

    num = sum_prod - ((sum_x * sum_y) / N)
    den = K.sqrt((sum_x_square - K.square(sum_x) / N) * (sum_y_square - K.square(sum_y) / N))

    return -2 * num / den


# Normalized Scanpath Saliency Loss 
Example 24
Project: deepGroupv2   Author: albu5   File: networks.py    MIT License 5 votes vote down vote up
def group_activity():
    pairwise_activity_input = Input(shape=[None, None, len(group_act_info)], name='pairwise_activity_input')
    pairwise_distance_input = Input(shape=[None, None], name='pairwise_distance_input')
    pairwise_distance_input_repeat = Lambda(lambda x: kb.repeat_elements(kb.expand_dims(pairwise_distance_input, 3),
                                                                         len(group_act_info),
                                                                         axis=3))(pairwise_distance_input)
    pairwise_activity_histogram = Lambda(lambda x: kb.mean(x[1]*x[0],
                                                           axis=2),
                                         name='weighted_mean_histogram')([pairwise_activity_input,
                                                                          pairwise_distance_input_repeat])
    dense1 = Dense(units=32, activation='sigmoid')(pairwise_activity_histogram)
    # dense2 = Dense(units=32, activation='sigmoid')(dense1)
    group_activity_output = Dense(units=len(group_act_info), activation='softmax', name='group_activity_output')(dense1)
    return Model(inputs=[pairwise_activity_input, pairwise_distance_input], outputs=[group_activity_output]) 
Example 25
Project: BERT   Author: yyht   File: funcs.py    Apache License 2.0 5 votes vote down vote up
def scaled_dot_product_attention_th(q, k, v, attn_mask, attention_dropout: float):
    w = theano_matmul(q, k)
    w = w / K.sqrt(K.cast(shape_list(v)[-1], K.floatx()))
    if attn_mask is not None:
        attn_mask = K.repeat_elements(attn_mask, shape_list(v)[1], 1)
        w = attn_mask * w + (1.0 - attn_mask) * -1e9
    w = K.T.exp(w - w.max()) / K.T.exp(w - w.max()).sum(axis=-1, keepdims=True)
    w = Dropout(attention_dropout)(w)
    return theano_matmul(w, v) 
Example 26
Project: yoctol-keras-layer-zoo   Author: Yoctol   File: maskConv.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, inputs):
        inputs_shape = K.int_shape(inputs)
        channel_axis = len(inputs_shape) - 1
        masked_tensor = self.compute_mask(inputs)
        masked_tensor = K.expand_dims(masked_tensor)
        masked_tensor = K.repeat_elements(
            masked_tensor,
            inputs_shape[channel_axis],
            channel_axis
        )
        return inputs * K.cast(masked_tensor, K.floatx()) 
Example 27
Project: yoctol-keras-layer-zoo   Author: Yoctol   File: mask_ConvNet.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, inputs, mask=None):
        outputs = self.layer.call(inputs)
        channel_axis = K.ndim(inputs) - 1
        mask_tensor = K.cast(mask, K.floatx())
        mask_tensor = K.expand_dims(mask_tensor)

        mask_output = self._compute_mask_output(mask_tensor)
        mask_output = K.repeat_elements(
            mask_output,
            self.layer.filters,
            channel_axis
        )
        return outputs * mask_output 
Example 28
Project: yoctol-keras-layer-zoo   Author: Yoctol   File: mask_pooling.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, inputs, mask=None):
        inputs_tensor = inputs
        mask_inputs = K.expand_dims(mask)

        inputs_shape = K.int_shape(inputs)
        channel_axis = len(inputs_shape) - 1

        if self.pool_mode == 'max':
            mask_inv = tf.logical_not(mask_inputs)
            negative_mask = K.cast(mask_inv, K.floatx()) * -1e20
            negative_mask = K.repeat_elements(
                negative_mask,
                inputs_shape[channel_axis],
                channel_axis
            )
            inputs_tensor = inputs + negative_mask

        output = self.layer._pooling_function(
            inputs_tensor,
            self.layer.pool_size,
            self.layer.strides,
            self.layer.padding,
            self.layer.data_format,
        )
        mask_inputs = K.cast(mask_inputs, K.floatx())

        mask_output = self.layer._pooling_function(
            mask_inputs,
            self.layer.pool_size,
            self.layer.strides,
            self.layer.padding,
            self.layer.data_format,
        )
        mask_output = K.repeat_elements(
            mask_output,
            inputs_shape[channel_axis],
            channel_axis
        )
        return output * mask_output 
Example 29
Project: VoxelNet-Keras   Author: baudm   File: model.py    MIT License 5 votes vote down vote up
def call(self, inputs, **kwargs):
        return K.repeat_elements(inputs, self.rep, self.axis) 
Example 30
Project: RPGOne   Author: RTHMaK   File: linear.py    Apache License 2.0 5 votes vote down vote up
def compute_similarity(self, tensor_1, tensor_2):
        combined_tensors = self._combine_tensors(tensor_1, tensor_2)
        dot_product = K.squeeze(K.dot(combined_tensors, self.weight_vector), axis=-1)
        if K.backend() == 'theano':
            # For some reason theano is having a hard time broadcasting the elementwise addition,
            # so we need to do this repeat.
            bias = K.repeat_elements(self.bias, K.int_shape(tensor_2)[-2], 0)
        else:
            bias = self.bias
        return self.activation(dot_product + bias) 
Example 31
Project: RPGOne   Author: RTHMaK   File: bilinear.py    Apache License 2.0 5 votes vote down vote up
def compute_similarity(self, tensor_1, tensor_2):
        dot_product = K.sum(K.dot(tensor_1, self.weight_matrix) * tensor_2, axis=-1)
        if K.backend() == 'theano':
            # For some reason theano is having a hard time broadcasting the elementwise addition,
            # so we need to do this repeat.
            bias = K.repeat_elements(self.bias, K.int_shape(tensor_1)[-2], 0)
        else:
            bias = self.bias
        return self.activation(dot_product + bias) 
Example 32
Project: RPGOne   Author: RTHMaK   File: backend.py    Apache License 2.0 5 votes vote down vote up
def tile_vector(vector, matrix):
    """
    NOTE: If your matrix has known shape (i.e., the relevant dimension from `K.int_shape(matrix) is
    not None`), you should just use `K.repeat_elements(vector)` instead of this.  This method
    works, however, when the number of rows in your matrix is unknown at graph compilation time.

    This method takes a (collection of) vector(s) (shape: (batch_size, vector_dim)), and tiles that
    vector a number of times, giving a matrix of shape (batch_size, tile_length, vector_dim).  (I
    say "vector" and "matrix" here because I'm ignoring the batch_size).  We need the matrix as
    input so we know what the tile_length is - the matrix is otherwise ignored.

    This is necessary in a number of places in the code.  For instance, if you want to do a dot
    product of a vector with all of the vectors in a matrix, the most efficient way to do that is
    to tile the vector first, then do an element-wise product with the matrix, then sum out the
    last mode.  So, we capture this functionality here.

    This is not done as a Keras Layer, however; if you want to use this function, you'll need to do
    it _inside_ of a Layer somehow, either in a Lambda or in the call() method of a Layer you're
    writing.
    """
    # Tensorflow can't use unknown sizes at runtime, so we have to make use of the broadcasting
    # ability of TF and Theano instead to create the tiled sentence encoding.

    # Shape: (tile_length, batch_size, vector_dim)
    k_ones = K.permute_dimensions(K.ones_like(matrix), [1, 0, 2])

    # Now we have a (tile_length, batch_size, vector_dim)*(batch_size, vector_dim)
    # elementwise multiplication which is broadcast. We then reshape back.
    tiled_vector = K.permute_dimensions(k_ones * vector, [1, 0, 2])
    return tiled_vector 
Example 33
Project: RPGOne   Author: RTHMaK   File: backend.py    Apache License 2.0 5 votes vote down vote up
def tile_scalar(scalar, vector):
    """
    NOTE: If your vector has known shape (i.e., the relevant dimension from `K.int_shape(vector) is
    not None`), you should just use `K.repeat_elements(scalar)` instead of this.  This method
    works, however, when the number of entries in your vector is unknown at graph compilation time.

    This method takes a (collection of) scalar(s) (shape: (batch_size, 1)), and tiles that
    scala a number of times, giving a vector of shape (batch_size, tile_length).  (I say "scalar"
    and "vector" here because I'm ignoring the batch_size).  We need the vector as input so we know
    what the tile_length is - the vector is otherwise ignored.

    This is not done as a Keras Layer, however; if you want to use this function, you'll need to do
    it _inside_ of a Layer somehow, either in a Lambda or in the call() method of a Layer you're
    writing.

    TODO(matt): we could probably make a more general `tile_tensor` method, which can do this for
    any dimenionsality.  There is another place in the code where we do this with a matrix and a
    tensor; all three of these can probably be one function.
    """
    # Tensorflow can't use unknown sizes at runtime, so we have to make use of the broadcasting
    # ability of TF and Theano instead to create the tiled sentence encoding.

    # Shape: (tile_length, batch_size)
    k_ones = K.permute_dimensions(K.ones_like(vector), [1, 0])

    # Now we have a (tile_length, batch_size) * (batch_size, 1) elementwise multiplication which is
    # broadcast. We then reshape back.
    tiled_scalar = K.permute_dimensions(k_ones * K.squeeze(scalar, axis=1), [1, 0])
    return tiled_scalar 
Example 34
Project: RPGOne   Author: RTHMaK   File: attention.py    Apache License 2.0 5 votes vote down vote up
def call(self, inputs, mask=None):
        vector, matrix = inputs
        if mask is None:
            matrix_mask = None
        else:
            matrix_mask = mask[1]
        num_rows = K.int_shape(matrix)[1]
        tiled_vector = K.repeat_elements(K.expand_dims(vector, axis=1), num_rows, axis=1)
        similarities = self.similarity_function.compute_similarity(tiled_vector, matrix)
        return masked_softmax(similarities, matrix_mask) 
Example 35
Project: RPGOne   Author: RTHMaK   File: overlap.py    Apache License 2.0 5 votes vote down vote up
def call(self, inputs, mask=None):
        # tensor_a, mask_a are of shape (batch size, length_a)
        # tensor_b mask_b are of shape (batch size, length_b)
        tensor_a, tensor_b = inputs
        if mask is None:
            mask_b = K.ones_like(tensor_b)
        else:
            mask_b = mask[1]
        length_a = K.int_shape(tensor_a)[1]
        length_b = K.int_shape(tensor_b)[1]
        # change the indices that are masked in b to -1, since no indices
        # in the document will ever be -1.
        tensor_b = K.cast(switch(mask_b, tensor_b, -1*K.ones_like(tensor_b)), "int32")

        # reshape tensor_a to shape (batch_size, length_a, length_b)
        tensor_a_tiled = K.repeat_elements(K.expand_dims(tensor_a, 2),
                                           length_b,
                                           axis=2)
        # reshape tensor_b to shape (batch_size, length_a, length_b)
        tensor_b_tiled = K.repeat_elements(K.expand_dims(tensor_b, 1),
                                           length_a,
                                           axis=1)
        overlap_mask = K.cast(K.equal(tensor_a_tiled, tensor_b_tiled), "float32")
        indices_overlap = K.sum(overlap_mask, axis=-1)
        binary_indices_overlap = K.cast(K.not_equal(indices_overlap,
                                                    K.zeros_like(indices_overlap)),
                                        "int32")
        one_hot_overlap = K.cast(K.one_hot(binary_indices_overlap, 2), "float32")
        return one_hot_overlap 
Example 36
Project: RPGOne   Author: RTHMaK   File: repeat.py    Apache License 2.0 5 votes vote down vote up
def compute_mask(self, inputs, mask=None):
        # pylint: disable=unused-argument
        if mask is None:
            return None
        return K.repeat_elements(K.expand_dims(mask, self.axis), self.repetitions, self.axis) 
Example 37
Project: stochastic_depth_keras   Author: dblN   File: train.py    MIT License 5 votes vote down vote up
def residual_drop(x, input_shape, output_shape, strides=(1, 1)):
    global add_tables

    nb_filter = output_shape[0]
    conv = Convolution2D(nb_filter, 3, 3, subsample=strides,
                         border_mode="same", W_regularizer=l2(weight_decay))(x)
    conv = BatchNormalization(axis=1)(conv)
    conv = Activation("relu")(conv)
    conv = Convolution2D(nb_filter, 3, 3,
                         border_mode="same", W_regularizer=l2(weight_decay))(conv)
    conv = BatchNormalization(axis=1)(conv)

    if strides[0] >= 2:
        x = AveragePooling2D(strides)(x)

    if (output_shape[0] - input_shape[0]) > 0:
        pad_shape = (1,
                     output_shape[0] - input_shape[0],
                     output_shape[1],
                     output_shape[2])
        padding = K.zeros(pad_shape)
        padding = K.repeat_elements(padding, K.shape(x)[0], axis=0)
        x = Lambda(lambda y: K.concatenate([y, padding], axis=1),
                   output_shape=output_shape)(x)

    _death_rate = K.variable(death_rate)
    scale = K.ones_like(conv) - _death_rate
    conv = Lambda(lambda c: K.in_test_phase(scale * c, c),
                  output_shape=output_shape)(conv)

    out = merge([conv, x], mode="sum")
    out = Activation("relu")(out)

    gate = K.variable(1, dtype="uint8")
    add_tables += [{"death_rate": _death_rate, "gate": gate}]
    return Lambda(lambda tensors: K.switch(gate, tensors[0], tensors[1]),
                  output_shape=output_shape)([out, x]) 
Example 38
Project: gcnet_stereo   Author: hmarechal   File: cost_volume.py    MIT License 5 votes vote down vote up
def cost_volume(inputs, dmax):
    left_feature = inputs[0]
    right_feature = inputs[1]
    left_feature = K.expand_dims(left_feature, axis=1)
    left_feature = K.repeat_elements(left_feature, dmax, axis=1)
    l,o,n = K.rnn(_concat_features, inputs=left_feature, initial_states=[right_feature], unroll=True)
    return K.permute_dimensions(o, (0, 2, 1, 3, 4))

# class CostVolumeBuilder():
#     @classmethod
#     def get_layer(cls, D):
#         return Lambda(cost_volume, arguments = {'d':D/2}) 
Example 39
Project: BERT-keras   Author: Separius   File: funcs.py    GNU General Public License v3.0 5 votes vote down vote up
def scaled_dot_product_attention_th(q, k, v, attn_mask, attention_dropout: float, neg_inf: float):
    w = theano_matmul(q, k)
    w = w / K.sqrt(K.cast(shape_list(v)[-1], K.floatx()))
    if attn_mask is not None:
        attn_mask = K.repeat_elements(attn_mask, shape_list(v)[1], 1)
        w = attn_mask * w + (1.0 - attn_mask) * neg_inf
    w = K.T.exp(w - w.max()) / K.T.exp(w - w.max()).sum(axis=-1, keepdims=True)
    w = Dropout(attention_dropout)(w)
    return theano_matmul(w, v) 
Example 40
Project: Neural-Chatbot   Author: saurabhmathur96   File: sequence_blocks.py    GNU General Public License v3.0 5 votes vote down vote up
def step(self, x, states):
        h, params = self.layer.step(x, states)
        attention = states[-1]

        m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a)
        s = K.sigmoid(K.dot(m, self.U_s) + self.b_s)

        if self.single_attention_param:
            h = h * K.repeat_elements(s, self.layer.units, axis=1)
        else:
            h = h * s

        return h, params 
Example 41
Project: kpi2017   Author: deepmipt   File: model.py    Apache License 2.0 5 votes vote down vote up
def weighted_with_attention(self, inputs):
        """Define a function for a lambda layer of a model."""

        inp, inp_cont = inputs
        val = np.eye(self.max_sequence_length)
        kcon = K.constant(value=val, dtype='float32')
        diag = K.repeat_elements(inp_cont, self.max_sequence_length, 2) * kcon
        return K.batch_dot(diag, K.permute_dimensions(inp, (0,2,1)), axes=[1,2]) 
Example 42
Project: experiments   Author: Octavian-ai   File: ntm.py    MIT License 5 votes vote down vote up
def patch_extract(self, address, patch, slice_begin):
		extract_width = self.patch_width - (slice_begin % self.patch_width)

		address_repeated = Lambda(lambda x:K.repeat_elements(K.expand_dims(x, -1), extract_width, -1))(address)
		patch_slices = Lambda(lambda x: x[:,:,slice_begin::])(patch)
		assert_shape(patch_slices, [self.patch_size, extract_width])

		rows = multiply([patch_slices, address_repeated])
		row = Lambda(lambda x: K.sum(x,-2))(rows)
		assert_shape(row, [extract_width])

		return row 
Example 43
Project: experiments   Author: Octavian-ai   File: ntm.py    MIT License 5 votes vote down vote up
def read(self, memory, address):
		address_repeated = Lambda(lambda x:K.repeat_elements(K.expand_dims(x, -1), self.word_size, -1))(address)
		read_rows = multiply([memory, address_repeated])
		read = Lambda(lambda x: K.sum(x,-2))(read_rows)

		assert_shape(read, [self.word_size])

		return read 
Example 44
Project: TF_PG_GANS   Author: naykun   File: layers.py    MIT License 5 votes vote down vote up
def call(self, v, **kwargs):
        assert len(self.si) == len(self.so) and self.si[0] == self.so[0]

        # Decrease feature maps.  Attention: channels last
        if self.si[-1] > self.so[-1]:
            v = v[...,:self.so[-1]]

        # Increase feature maps.  Attention:channels last
        if self.si[-1] < self.so[-1]:
            z = K.zeros((self.so[:-1] + (self.so[-1] - self.si[-1])),dtype=v.dtype)
            v = K.concatenate([v,z])
        
        # Shrink spatial axis
        if len(self.si) == 4 and (self.si[1] > self.so[1] or self.si[2] > self.so[2]):
            assert self.si[1] % self.so[1] == 0 and self.si[2] % self.so[2] == 0
            pool_size = (self.si[1] / self.so[1],self.si[2] / self.so[2])
            strides = pool_size
            v = K.pool2d(v,pool_size=pool_size,strides=strides,padding='same',data_format='channels_last',pool_mode='avg')

        #Extend spatial axis
        for i in range(1,len(self.si) - 1):
            if self.si[i] < self.so[i]:
                assert self.so[i] % self.si[i] == 0
                v = K.repeat_elements(v,rep=int(self.so[i] / self.si[i]),axis=i)

        return v 
Example 45
Project: TF_PG_GANS   Author: naykun   File: layers.py    MIT License 5 votes vote down vote up
def call(self, input, **kwargs):
        s = list(K.int_shape(input))
        s[0] = tf.shape(input)[0]
        vals = self.adjusted_std(input,axis=0,keepdims=True)                # per activation, over minibatch dim
        if self.averaging == 'all':                                 # average everything --> 1 value per minibatch
            vals = K.mean(vals,keepdims=True)
            reps = s; reps[-1]=1;reps[0] = tf.shape(input)[0]
            vals = K.tile(vals,reps)
        elif self.averaging == 'spatial':                           # average spatial locations
            if len(s) == 4:
                vals = K.mean(vals,axis=(1,2),keepdims=True)
            reps = s; reps[-1]=1
            vals = K.tile(vals,reps)
        elif self.averaging == 'none':                              # no averaging, pass on all information
            vals = K.repeat_elements(vals,rep=s[0],axis=0)
        elif self.averaging == 'gpool':                             # EXPERIMENTAL: compute variance (func) over minibatch AND spatial locations.
            if len(s) == 4:
                vals = self.adjusted_std(input,axis=(0,1,2),keepdims=True)
            reps = s; reps[-1]=1
            vals = K.tile(vals,reps)
        elif self.averaging == 'flat':
            vals = self.adjusted_std(input,keepdims=True)                   # variance of ALL activations --> 1 value per minibatch
            reps = s; reps[-1]=1
            vals = K.tile(vals,reps)
        elif self.averaging.startswith('group'):                    # average everything over n groups of feature maps --> n values per minibatch
            n = int(self.averaging[len('group'):])
            vals = vals.reshape((1, s[1], s[2], n,s[3]/n))
            vals = K.mean(vals, axis=(1,2,4), keepdims=True)
            vals = vals.reshape((1, 1, 1,n))
            reps = s; reps[-1] = 1
            vals = K.tile(vals, reps)
        else:
            raise ValueError('Invalid averaging mode', self.averaging)
        return K.concatenate([input, vals], axis=-1) 
Example 46
Project: deep_intent   Author: AutonomyLab   File: custom_layers.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def broadcast_channels(x):
    return K.repeat_elements(x, 128, axis=-1) 
Example 47
Project: ASD_classification   Author: MikhailStartsev   File: gaussian_prior.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, x, mask=None):
        mu_x = self.W[:self.nb_gaussian]
        mu_y = self.W[self.nb_gaussian:self.nb_gaussian*2]
        sigma_x = self.W[self.nb_gaussian*2:self.nb_gaussian*3]
        sigma_y = self.W[self.nb_gaussian*3:]

        self.b_s = x.shape[0]
        self.height = x.shape[2]
        self.width = x.shape[3]

        e = self.height / self.width
        e1 = (1 - e) / 2
        e2 = e1 + e

        mu_x = K.clip(mu_x, 0.25, 0.75)
        mu_y = K.clip(mu_y, 0.35, 0.65)

        sigma_x = K.clip(sigma_x, 0.1, 0.9)
        sigma_y = K.clip(sigma_y, 0.2, 0.8)

        x_t = T.dot(T.ones((self.height, 1)), self._linspace(0, 1.0, self.width).dimshuffle('x', 0))
        y_t = T.dot(self._linspace(e1, e2, self.height).dimshuffle(0, 'x'), T.ones((1, self.width)))

        x_t = K.repeat_elements(K.expand_dims(x_t, dim=-1), self.nb_gaussian, axis=-1)
        y_t = K.repeat_elements(K.expand_dims(y_t, dim=-1), self.nb_gaussian, axis=-1)

        gaussian = 1 / (2 * np.pi * sigma_x * sigma_y + K.epsilon()) * \
                   T.exp(-((x_t - mu_x) ** 2 / (2 * sigma_x ** 2 + K.epsilon()) +
                           (y_t - mu_y) ** 2 / (2 * sigma_y ** 2 + K.epsilon())))

        gaussian = K.permute_dimensions(gaussian, (2, 0, 1))
        max_gauss = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(gaussian, axis=1), axis=1)), self.height, axis=-1)), self.width, axis=-1)
        gaussian = gaussian / max_gauss

        output = K.repeat_elements(K.expand_dims(gaussian, dim=0), self.b_s, axis=0)

        return output 
Example 48
Project: ASD_classification   Author: MikhailStartsev   File: models.py    GNU General Public License v3.0 5 votes vote down vote up
def correlation_coefficient(y_true, y_pred):
    max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    y_pred /= max_y_pred

    sum_y_true = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_true, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    sum_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)

    y_true /= (sum_y_true + K.epsilon())
    y_pred /= (sum_y_pred + K.epsilon())

    N = shape_r_out * shape_c_out
    sum_prod = K.sum(K.sum(y_true * y_pred, axis=2), axis=2)
    sum_x = K.sum(K.sum(y_true, axis=2), axis=2)
    sum_y = K.sum(K.sum(y_pred, axis=2), axis=2)
    sum_x_square = K.sum(K.sum(K.square(y_true), axis=2), axis=2)
    sum_y_square = K.sum(K.sum(K.square(y_pred), axis=2), axis=2)

    num = sum_prod - ((sum_x * sum_y) / N)
    den = K.sqrt((sum_x_square - K.square(sum_x) / N) * (sum_y_square - K.square(sum_y) / N))

    return -2 * num / den


# Normalized Scanpath Saliency Loss 
Example 49
Project: DLWP   Author: jweyn   File: custom.py    MIT License 5 votes vote down vote up
def init_weights(self, shape):
        if shape[-1] is None:
            return
        # Repeat the weights tensor to match the last dimensions of the batch
        if self.data_format == 'channels_last':
            self.weights = K.expand_dims(self.weights, axis=1)
            self.weights = K.repeat_elements(self.weights, shape[-1], axis=1)
        else:
            self.weights = K.expand_dims(self.weights, axis=1)
            self.weights = K.repeat_elements(self.weights, shape[-2], axis=1)
            self.weights = K.expand_dims(self.weights, axis=2)
            self.weights = K.repeat_elements(self.weights, shape[-1], axis=2)
        self.is_init = True 
Example 50
Project: DLWP   Author: jweyn   File: custom.py    MIT License 5 votes vote down vote up
def latitude_weighted_loss(loss_function=mean_squared_error, lats=None, output_shape=(), axis=-2, weighting='cosine'):
    """
    Create a loss function that weights inputs by a function of latitude before calculating the loss.

    :param loss_function: method: Keras loss function to apply after the weighting
    :param lats: ndarray: 1-dimensional array of latitude coordinates
    :param output_shape: tuple: shape of expected model output
    :param axis: int: latitude axis in model output shape
    :param weighting: str: type of weighting to apply. Options are:
            cosine: weight by the cosine of the latitude (default)
            midlatitude: weight by the cosine of the latitude but also apply a 25% reduction to the equator and boost
                to the mid-latitudes
    :return: callable loss function
    """
    if weighting not in ['cosine', 'midlatitude']:
        raise ValueError("'weighting' must be one of 'cosine' or 'midlatitude'")
    if lats is not None:
        lat_tensor = K.zeros(lats.shape)
        lat_tensor.assign(K.cast_to_floatx(lats[:]))

        weights = K.cos(lat_tensor * np.pi / 180.)
        if weighting == 'midlatitude':
            weights = weights + 0.5 * K.pow(K.sin(lat_tensor * 2 * np.pi / 180.), 2.)

        weight_shape = output_shape[axis:]
        for d in weight_shape[1:]:
            weights = K.expand_dims(weights, axis=-1)
            weights = K.repeat_elements(weights, d, axis=-1)

    else:
        weights = K.ones(output_shape)

    def lat_loss(y_true, y_pred):
        return loss_function(y_true * weights, y_pred * weights)

    return lat_loss 
Example 51
Project: TemporalActionParsing-FineGrained   Author: yz-cnsdqz   File: tf_models.py    MIT License 5 votes vote down vote up
def weighted_average_local(X,w):
    W = K.expand_dims(w,axis=-1)
    W = K.repeat_elements(W, X.shape[-1], axis=-1)
    y = X*W

    return K.sum(y, axis=1, keepdims=False) 
Example 52
Project: TemporalActionParsing-FineGrained   Author: yz-cnsdqz   File: tf_models.py    MIT License 5 votes vote down vote up
def call(self, x):

        input_shape = x.get_shape().as_list()
        
        # first compute xx'
        x_r = K.expand_dims(x, axis=-1) * K.expand_dims(x, axis=-2)
        x_r = K.reshape(x_r, [-1,input_shape[1],input_shape[2]**2])



        if not self.trainable:
            x = AveragePooling1D(pool_size=self.time_conv_size, strides=self.stride)(x_r)

        else:
            # convfilter = K.expand_dims(self.st_conv_filter_one, axis=-1)
            # convfilter = K.repeat_elements(self.st_conv_filter_one, input_shape[2]**2, axis=-1) # [1,timeconvsize, D2]
            # convfilter = K.expand_dims(convfilter, axis=0)    

            x_r_padding = keras.layers.ZeroPadding1D( self.time_conv_size//2 )(x_r)

            x_r_list = [   K.sum(self.st_conv_filter_one * x_r_padding[:,i:i+self.time_conv_size, :], axis=1) for i in range(0,input_shape[1],self.stride)]
            x_r = K.stack(x_r_list, axis=1)
            print(x_r.shape)
            # x = AveragePooling1D(pool_size=self.time_conv_size, strides=self.stride)(x_r)            

        return x_r 
Example 53
Project: recurrent-attention-for-QA-SQUAD-based-on-keras   Author: wentaozhu   File: QnA.py    MIT License 5 votes vote down vote up
def step(self, x, states):
        h, [h, c] = super(AttentionLSTM, self).step(x, states)
        attention = states[4]

        m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a)
        # Intuitively it makes more sense to use a sigmoid (was getting some NaN problems
        # which I think might have been caused by the exponential function -> gradients blow up)
        s = K.sigmoid(K.dot(m, self.U_s) + self.b_s)

        if self.single_attention_param:
            h = h * K.repeat_elements(s, self.output_dim, axis=1)
        else:
            h = h * s

        return h, [h, c] 
Example 54
Project: recurrent-attention-for-QA-SQUAD-based-on-keras   Author: wentaozhu   File: QnA.py    MIT License 5 votes vote down vote up
def step(self, x, states):
        h, [h, c] = self.layer.step(x, states)
        attention = states[4]

        m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a)
        s = K.sigmoid(K.dot(m, self.U_s) + self.b_s)

        if self.single_attention_param:
            h = h * K.repeat_elements(s, self.layer.output_dim, axis=1)
        else:
            h = h * s

        return h, [h, c] 
Example 55
Project: recurrent-attention-for-QA-SQUAD-based-on-keras   Author: wentaozhu   File: layers.py    MIT License 5 votes vote down vote up
def step(self, x, states):
        h, [h, c] = super(AttentionLSTM, self).step(x, states)
        attention = states[4]

        m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a)
        # Intuitively it makes more sense to use a sigmoid (was getting some NaN problems
        # which I think might have been caused by the exponential function -> gradients blow up)
        s = K.sigmoid(K.dot(m, self.U_s) + self.b_s)

        if self.single_attention_param:
            h = h * K.repeat_elements(s, self.output_dim, axis=1)
        else:
            h = h * s

        return h, [h, c] 
Example 56
Project: recurrent-attention-for-QA-SQUAD-based-on-keras   Author: wentaozhu   File: layers.py    MIT License 5 votes vote down vote up
def step(self, x, states):
        h, [h, c] = self.layer.step(x, states)
        attention = states[4]

        m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a)
        s = K.sigmoid(K.dot(m, self.U_s) + self.b_s)

        if self.single_attention_param:
            h = h * K.repeat_elements(s, self.layer.output_dim, axis=1)
        else:
            h = h * s

        return h, [h, c] 
Example 57
Project: keras-han-for-docla   Author: FlorisHoogenboom   File: layers.py    MIT License 5 votes vote down vote up
def call(self, X):
        att_weights = self._get_attention_weights(X)

        # Reshape the attention weights to match the dimensions of X
        att_weights = K.reshape(att_weights, (-1, att_weights.shape[1], 1))
        att_weights = K.repeat_elements(att_weights, X.shape[-1], -1)

        # Multiply each input by its attention weights
        weighted_input = keras.layers.Multiply()([X, att_weights])

        # Sum in the direction of the time-axis.
        return K.sum(weighted_input, axis=1) 
Example 58
Project: onto-lstm   Author: pdasigi   File: preposition_predictors.py    Apache License 2.0 5 votes vote down vote up
def get_split_averages(input_tensor, input_mask, indices):
        # Splits input tensor into three parts based on the indices and
        # returns average of values prior to index, values at the index and
        # average of values after the index.
        # input_tensor: (batch_size, input_length, input_dim)
        # input_mask: (batch_size, input_length)
        # indices: (batch_size, 1)
        # (1, input_length)
        length_range = K.expand_dims(K.arange(K.shape(input_tensor)[1]), dim=0)
        # (batch_size, input_length)
        batched_range = K.repeat_elements(length_range, K.shape(input_tensor)[0], 0)
        tiled_indices = K.repeat_elements(indices, K.shape(input_tensor)[1], 1)  # (batch_size, input_length)
        greater_mask = K.greater(batched_range, tiled_indices)  # (batch_size, input_length)
        lesser_mask = K.lesser(batched_range, tiled_indices)  # (batch_size, input_length)
        equal_mask = K.equal(batched_range, tiled_indices)  # (batch_size, input_length)

        # We also need to mask these masks using the input mask.
        # (batch_size, input_length)
        if input_mask is not None:
            greater_mask = switch(input_mask, greater_mask, K.zeros_like(greater_mask))
            lesser_mask = switch(input_mask, lesser_mask, K.zeros_like(lesser_mask))

        post_sum = K.sum(switch(K.expand_dims(greater_mask), input_tensor, K.zeros_like(input_tensor)), axis=1)  # (batch_size, input_dim)
        pre_sum = K.sum(switch(K.expand_dims(lesser_mask), input_tensor, K.zeros_like(input_tensor)), axis=1)  # (batch_size, input_dim)
        values_at_indices = K.sum(switch(K.expand_dims(equal_mask), input_tensor, K.zeros_like(input_tensor)), axis=1)  # (batch_size, input_dim)

        post_normalizer = K.expand_dims(K.sum(greater_mask, axis=1) + K.epsilon(), dim=1)  # (batch_size, 1)
        pre_normalizer = K.expand_dims(K.sum(lesser_mask, axis=1) + K.epsilon(), dim=1)  # (batch_size, 1)

        return K.cast(pre_sum / pre_normalizer, 'float32'), values_at_indices, K.cast(post_sum / post_normalizer, 'float32') 
Example 59
Project: paraphrase_identification   Author: CubasMike   File: grn_model.py    MIT License 5 votes vote down vote up
def call(self, x):
        e1 = x[0]
        e2 = x[1]

        batch_size = K.shape(e1)[0]
        # Usually len1 = len2 = max_seq_length
        _, len1, emb_dim = K.int_shape(e1)
        _, len2, _ = K.int_shape(e2)

        # Repeating the matrices to generate all the combinations
        ne1 = K.reshape(K.repeat_elements(K.expand_dims(e1, axis=2), len2, axis=2),
                       (batch_size, len1*len2, emb_dim))
        ne2 = K.reshape(K.repeat_elements(K.expand_dims(e2, axis=1), len1, axis=1),
                       (batch_size, len1*len2, emb_dim))

        # Repeating the second matrix to use in Bilinear Tensor Product
        ne2_k = K.repeat_elements(K.expand_dims(ne2, axis=-1), self.output_dim, axis=-1)

        # Bilinear tensor product
        btp = K.sum(ne2_k * K.permute_dimensions(K.dot(ne1, self.Wb), (0,1,3,2)), axis=2)
        btp = K.reshape(btp, (batch_size, len1, len2, self.output_dim))

        # Concatenating inputs to apply Single Layer Network
        e = K.concatenate([ne1, ne2], axis=-1)

        # Single Layer Network
        #sln = K.relu(K.dot(e, self.Wd))
        sln = K.tanh(K.dot(e, self.Wd))
        sln = K.reshape(sln, (batch_size, len1, len2, self.output_dim))

        # Gate
        g = K.sigmoid(K.dot(e, self.Wg) + self.bg)
        g = K.reshape(g, (batch_size, len1, len2, self.output_dim))

        # Gated Relevance Network
        #s = K.reshape(K.dot(g*btp + (1-g)*sln + self.b, self.u), (batch_size, len1, len2))
        s = K.dot(g*btp + (1-g)*sln + self.b, self.u)

        return s 
Example 60
Project: mlnet   Author: marcellacornia   File: model.py    MIT License 5 votes vote down vote up
def loss(y_true, y_pred):
    max_y = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=2), axis=2)), shape_r_gt, axis=-1)), shape_c_gt, axis=-1)
    return K.mean(K.square((y_pred / max_y) - y_true) / (1 - y_true + 0.1)) 
Example 61
Project: ikelos   Author: braingineer   File: attention.py    MIT License 5 votes vote down vote up
def call(self, x, mask=None):
        # b,n,f -> b,f via b,n broadcasted
        p_vectors = K.expand_dims(super(SoftAttention, self).call(x, mask), 2)
        expanded_p = K.repeat_elements(p_vectors, K.shape(x)[2], axis=2)
        attended = K.sum(expanded_p * x, axis=1)
        if self.return_probabilities:
            return [attended, p_vectors]
        return attended 
Example 62
Project: ikelos   Author: braingineer   File: attention.py    MIT License 5 votes vote down vote up
def call(self, target_tensor, mask=None):
        last_dim = K.ndim(self.p_tensor)
        expanded_p = K.repeat_elements(K.expand_dims(self.p_tensor, last_dim), 
                                       K.shape(target_tensor)[last_dim], 
                                       axis=last_dim)
        return K.sum(expanded_p * target_tensor, axis=last_dim-1) 
Example 63
Project: Keras-progressive_growing_of_gans   Author: MSC-BUAA   File: layers.py    MIT License 5 votes vote down vote up
def call(self, v, **kwargs):
        assert len(self.si) == len(self.so) and self.si[0] == self.so[0]

        # Decrease feature maps.  Attention: channels last
        if self.si[-1] > self.so[-1]:
            v = v[...,:self.so[-1]]

        # Increase feature maps.  Attention:channels last
        if self.si[-1] < self.so[-1]:
            z = K.zeros((self.so[:-1] + (self.so[-1] - self.si[-1])),dtype=v.dtype)
            v = K.concatenate([v,z])
        
        # Shrink spatial axis
        if len(self.si) == 4 and (self.si[1] > self.so[1] or self.si[2] > self.so[2]):
            assert self.si[1] % self.so[1] == 0 and self.si[2] % self.so[2] == 0
            pool_size = (self.si[1] / self.so[1],self.si[2] / self.so[2])
            strides = pool_size
            v = K.pool2d(v,pool_size=pool_size,strides=strides,padding='same',data_format='channels_last',pool_mode='avg')

        #Extend spatial axis
        for i in range(1,len(self.si) - 1):
            if self.si[i] < self.so[i]:
                assert self.so[i] % self.si[i] == 0
                v = K.repeat_elements(v,rep=int(self.so[i] / self.si[i]),axis=i)

        return v 
Example 64
Project: Keras-progressive_growing_of_gans   Author: MSC-BUAA   File: layers.py    MIT License 5 votes vote down vote up
def call(self, input, **kwargs):
        s = list(K.int_shape(input))
        s[0] = tf.shape(input)[0]
        vals = self.adjusted_std(input,axis=0,keepdims=True)                # per activation, over minibatch dim
        if self.averaging == 'all':                                 # average everything --> 1 value per minibatch
            vals = K.mean(vals,keepdims=True)
            reps = s; reps[-1]=1;reps[0] = tf.shape(input)[0]
            vals = K.tile(vals,reps)
        elif self.averaging == 'spatial':                           # average spatial locations
            if len(s) == 4:
                vals = K.mean(vals,axis=(1,2),keepdims=True)
            reps = s; reps[-1]=1
            vals = K.tile(vals,reps)
        elif self.averaging == 'none':                              # no averaging, pass on all information
            vals = K.repeat_elements(vals,rep=s[0],axis=0)
        elif self.averaging == 'gpool':                             # EXPERIMENTAL: compute variance (func) over minibatch AND spatial locations.
            if len(s) == 4:
                vals = self.adjusted_std(input,axis=(0,1,2),keepdims=True)
            reps = s; reps[-1]=1
            vals = K.tile(vals,reps)
        elif self.averaging == 'flat':
            vals = self.adjusted_std(input,keepdims=True)                   # variance of ALL activations --> 1 value per minibatch
            reps = s; reps[-1]=1
            vals = K.tile(vals,reps)
        elif self.averaging.startswith('group'):                    # average everything over n groups of feature maps --> n values per minibatch
            n = int(self.averaging[len('group'):])
            vals = vals.reshape((1, s[1], s[2], n,s[3]/n))
            vals = K.mean(vals, axis=(1,2,4), keepdims=True)
            vals = vals.reshape((1, 1, 1,n))
            reps = s; reps[-1] = 1
            vals = K.tile(vals, reps)
        else:
            raise ValueError('Invalid averaging mode', self.averaging)
        return K.concatenate([input, vals], axis=-1) 
Example 65
Project: very-deep-convnets-raw-waveforms   Author: philipperemy   File: model_resnet.py    Apache License 2.0 5 votes vote down vote up
def identity_block(input_tensor, kernel_size, filters, stage, block):
    conv_name_base = 'res' + str(stage) + str(block) + '_branch'
    bn_name_base = 'bn' + str(stage) + str(block) + '_branch'

    x = Conv1D(filters,
               kernel_size=kernel_size,
               strides=1,
               padding='same',
               kernel_initializer='glorot_uniform',
               kernel_regularizer=regularizers.l2(l=0.0001),
               name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    x = Conv1D(filters,
               kernel_size=kernel_size,
               strides=1,
               padding='same',
               kernel_initializer='glorot_uniform',
               kernel_regularizer=regularizers.l2(l=0.0001),
               name=conv_name_base + '2b')(x)
    x = BatchNormalization(name=bn_name_base + '2b')(x)

    # up-sample from the activation maps.
    # otherwise it's a mismatch. Recommendation of the authors.
    # here we x2 the number of filters.
    # See that as duplicating everything and concatenate them.
    if input_tensor.shape[2] != x.shape[2]:
        x = layers.add([x, Lambda(lambda y: K.repeat_elements(y, rep=2, axis=2))(input_tensor)])
    else:
        x = layers.add([x, input_tensor])

    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    return x 
Example 66
Project: deep_qa   Author: allenai   File: backend.py    Apache License 2.0 5 votes vote down vote up
def tile_vector(vector, matrix):
    """
    NOTE: If your matrix has known shape (i.e., the relevant dimension from `K.int_shape(matrix) is
    not None`), you should just use `K.repeat_elements(vector)` instead of this.  This method
    works, however, when the number of rows in your matrix is unknown at graph compilation time.

    This method takes a (collection of) vector(s) (shape: (batch_size, vector_dim)), and tiles that
    vector a number of times, giving a matrix of shape (batch_size, tile_length, vector_dim).  (I
    say "vector" and "matrix" here because I'm ignoring the batch_size).  We need the matrix as
    input so we know what the tile_length is - the matrix is otherwise ignored.

    This is necessary in a number of places in the code.  For instance, if you want to do a dot
    product of a vector with all of the vectors in a matrix, the most efficient way to do that is
    to tile the vector first, then do an element-wise product with the matrix, then sum out the
    last mode.  So, we capture this functionality here.

    This is not done as a Keras Layer, however; if you want to use this function, you'll need to do
    it _inside_ of a Layer somehow, either in a Lambda or in the call() method of a Layer you're
    writing.
    """
    # Tensorflow can't use unknown sizes at runtime, so we have to make use of the broadcasting
    # ability of TF and Theano instead to create the tiled sentence encoding.

    # Shape: (tile_length, batch_size, vector_dim)
    k_ones = K.permute_dimensions(K.ones_like(matrix), [1, 0, 2])

    # Now we have a (tile_length, batch_size, vector_dim)*(batch_size, vector_dim)
    # elementwise multiplication which is broadcast. We then reshape back.
    tiled_vector = K.permute_dimensions(k_ones * vector, [1, 0, 2])
    return tiled_vector 
Example 67
Project: deep_qa   Author: allenai   File: backend.py    Apache License 2.0 5 votes vote down vote up
def tile_scalar(scalar, vector):
    """
    NOTE: If your vector has known shape (i.e., the relevant dimension from `K.int_shape(vector) is
    not None`), you should just use `K.repeat_elements(scalar)` instead of this.  This method
    works, however, when the number of entries in your vector is unknown at graph compilation time.

    This method takes a (collection of) scalar(s) (shape: (batch_size, 1)), and tiles that
    scala a number of times, giving a vector of shape (batch_size, tile_length).  (I say "scalar"
    and "vector" here because I'm ignoring the batch_size).  We need the vector as input so we know
    what the tile_length is - the vector is otherwise ignored.

    This is not done as a Keras Layer, however; if you want to use this function, you'll need to do
    it _inside_ of a Layer somehow, either in a Lambda or in the call() method of a Layer you're
    writing.

    TODO(matt): we could probably make a more general `tile_tensor` method, which can do this for
    any dimenionsality.  There is another place in the code where we do this with a matrix and a
    tensor; all three of these can probably be one function.
    """
    # Tensorflow can't use unknown sizes at runtime, so we have to make use of the broadcasting
    # ability of TF and Theano instead to create the tiled sentence encoding.

    # Shape: (tile_length, batch_size)
    k_ones = K.permute_dimensions(K.ones_like(vector), [1, 0])

    # Now we have a (tile_length, batch_size) * (batch_size, 1) elementwise multiplication which is
    # broadcast. We then reshape back.
    tiled_scalar = K.permute_dimensions(k_ones * K.squeeze(scalar, axis=1), [1, 0])
    return tiled_scalar 
Example 68
Project: deep_qa   Author: allenai   File: attention.py    Apache License 2.0 5 votes vote down vote up
def call(self, inputs, mask=None):
        vector, matrix = inputs
        if mask is None:
            matrix_mask = None
        else:
            matrix_mask = mask[1]
        num_rows = K.int_shape(matrix)[1]
        tiled_vector = K.repeat_elements(K.expand_dims(vector, axis=1), num_rows, axis=1)
        similarities = self.similarity_function.compute_similarity(tiled_vector, matrix)
        if self.normalize:
            return masked_softmax(similarities, matrix_mask)
        else:
            return similarities 
Example 69
Project: deep_qa   Author: allenai   File: repeat.py    Apache License 2.0 5 votes vote down vote up
def __repeat_tensor(self, tensor):
        return K.repeat_elements(K.expand_dims(tensor, self.axis), self.repetitions, self.axis) 
Example 70
Project: Inpainting   Author: NaturezzZ   File: test.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, inputs):
		normalization = K.mean(inputs, axis = [1, 2], keepdims = True) #均值
		normalization = K.repeat_elements(normalization, inputs.shape[1], axis=1)
		normalization = K.repeat_elements(normalization, inputs.shape[2], axis=2)
		return normalization 
Example 71
Project: KDDCup2019_admin   Author: DominickZhang   File: fm_keras.py    MIT License 4 votes vote down vote up
def call(self, inputs, **kwargs):
        one_hot_feature_index = K.cast(K.slice(inputs, (0, 0), (-1, self.feature_num)), "int32")
        numeric_feature = K.slice(inputs, (0, self.feature_num), (-1, -1))

        ## first order
        first_order_index = K.reshape(one_hot_feature_index, (-1,))
        get_first_order_weights = K.gather(self.w_one_hot, first_order_index)
        first_order_weights = K.reshape(get_first_order_weights, (-1, self.feature_num))

        first_order = K.sum(first_order_weights, 1) + K.sum(K.dot(numeric_feature, self.w_numeric), 1)

        ## second order
        get_second_order_weights = K.gather(self.v_one_hot, first_order_index)
        second_order_weights = K.reshape(get_second_order_weights, (-1, self.feature_num, self.embedding_size))
        numeric_weights = K.expand_dims(self.v_numeric, 0) * K.expand_dims(numeric_feature, -1)

        all_weights = K.concatenate([second_order_weights, numeric_weights], axis=1)
        weights_sum_square = K.sum(K.square(all_weights), 1)
        weights_square_sum = K.square(K.sum(all_weights, 1))
        second_order = 0.5*K.sum(weights_square_sum - weights_sum_square, 1)

        output = first_order + second_order + self.b

        if self.activation is not None:
        	output = self.activation(output)
        output = K.expand_dims(output, -1)
        return output



        '''X_square = K.square(inputs)

        xv = K.square(K.dot(inputs, self.v))
        xw = K.dot(inputs, self.w)

        p = 0.5 * K.sum(xv - K.dot(X_square, K.square(self.v)), 1)
        rp = K.repeat_elements(K.reshape(p, (-1, 1)), self.output_dim, axis=-1)

        f = xw + rp + self.b

        output = K.reshape(f, (-1, self.output_dim))
        
        if self.activation is not None:
            output = self.activation(output)

        return output''' 
Example 72
Project: RPGOne   Author: RTHMaK   File: word_overlap_tuple_matcher.py    Apache License 2.0 4 votes vote down vote up
def call(self, inputs, mask=None):
        tuple1_input, tuple2_input = inputs  # tuple1 shape: (batch size, num_slots, num_slot_words_t1)
                                             # tuple2 shape: (batch size, num_slots, num_slot_words_t2)
        # Check that the tuples have the same number of slots.
        assert K.int_shape(tuple1_input)[1] == K.int_shape(tuple2_input)[1]

        # Expand tuple1 to shape: (batch size, num_slots, num_slot_words_t1, num_slot_words_t2)
        expanded_tuple1 = K.expand_dims(tuple1_input, 3)    # now (b, num_slots, num_slot_words_tuple1, 1)
        tiled_tuple1 = K.repeat_elements(expanded_tuple1, K.int_shape(tuple2_input)[2], axis=3)

        # Expand tuple2 to shape: (batch size, num_slots, num_slot_words_t1, num_slot_words_t2)
        expanded_tuple2 = K.expand_dims(tuple2_input, 2)    # now (b, num_slots, 1, num_slot_words_tuple2)
        tiled_tuple2 = K.repeat_elements(expanded_tuple2, K.int_shape(tuple1_input)[2], axis=2)

        # This generates a binary tensor of the same shape as tiled_tuple1 /
        # tiled_tuple2 that indicates if given word matches between tuple1 and tuple2 in a particular slot.
        # Currently, we only consider S_t1 <--> S_t2 etc overlap, not across slot types.
        # shape: (batch size, num_slots, num_slot_words_tuple1, num_slot_words_tuple2)
        tuple_words_overlap = K.cast(K.equal(tiled_tuple1, tiled_tuple2), "float32")

        # Exclude zeros (i.e. padded elements) from matching each other.
        # tuple1_mask is 1 if tuple1 has a real element, 0 if it's a padding element.
        tiled_tuple1_mask = K.cast(K.not_equal(tiled_tuple1, K.zeros_like(tiled_tuple1, dtype='int32')),
                                   dtype='float32')
        zeros_excluded_overlap = tuple_words_overlap * tiled_tuple1_mask

        # Find non-padding elements in tuple1.
        # shape: (batch size, num_slots, num_slot_words_tuple1)
        non_padded_tuple1 = K.cast(K.not_equal(tuple1_input, K.zeros_like(tuple1_input)), 'float32')
        # Count these non-padded elements to know how many words were in each slot of tuple1.
        # shape: (batch size, num_slots)
        num_tuple1_words_in_each_slot = K.sum(non_padded_tuple1, axis=2)

        # Find the number of words that overlap in each of the slots.
        # shape: (batch size, num_slots)
        slot_overlap_sums = K.sum(K.sum(zeros_excluded_overlap, axis=3), axis=2)

        # # Normalize by the number of words in tuple1.
        # TODO(becky): should this be fixed to tuple1 or allowed to vary? Does switching input order work
        # for varying?
        # This block of code prevents dividing by zero during normalization:
        divisor = num_tuple1_words_in_each_slot
        # If the divisor is zero at a position, we add epsilon to it.
        is_zero_divisor = K.equal(divisor, K.zeros_like(divisor))
        divisor = switch(is_zero_divisor, K.ones_like(divisor) * K.epsilon(), divisor)

        # shape: (batch size, num_slots)
        normalized_slot_overlap = slot_overlap_sums / divisor
        # shape: (batch size, hidden_layer_width)
        raw_entailment = apply_feed_forward(normalized_slot_overlap, self.hidden_layer_weights,
                                            activations.get(self.hidden_layer_activation))
        # shape: (batch size, 1)
        final_score = activations.get(self.final_activation)(K.dot(raw_entailment, self.score_layer))

        return final_score 
Example 73
Project: MatchZoo   Author: NTMC-Community   File: match_lstm.py    Apache License 2.0 4 votes vote down vote up
def build(self):
        """Build model."""
        input_left, input_right = self._make_inputs()
        len_left = input_left.shape[1]
        len_right = input_right.shape[1]
        embedding = self._make_embedding_layer()
        embed_left = embedding(input_left)
        embed_right = embedding(input_right)

        lstm_left = keras.layers.LSTM(self._params['lstm_num_units'],
                                      return_sequences=True,
                                      name='lstm_left')
        lstm_right = keras.layers.LSTM(self._params['lstm_num_units'],
                                       return_sequences=True,
                                       name='lstm_right')
        encoded_left = lstm_left(embed_left)
        encoded_right = lstm_right(embed_right)

        def attention(tensors):
            """Attention layer."""
            left, right = tensors
            tensor_left = tf.expand_dims(left, axis=2)
            tensor_right = tf.expand_dims(right, axis=1)
            tensor_left = K.repeat_elements(tensor_left, len_right, 2)
            tensor_right = K.repeat_elements(tensor_right, len_left, 1)
            tensor_merged = tf.concat([tensor_left, tensor_right], axis=-1)
            middle_output = keras.layers.Dense(self._params['fc_num_units'],
                                               activation='tanh')(
                tensor_merged)
            attn_scores = keras.layers.Dense(1)(middle_output)
            attn_scores = tf.squeeze(attn_scores, axis=3)
            exp_attn_scores = tf.math.exp(
                attn_scores - tf.reduce_max(attn_scores, axis=-1, keepdims=True))
            exp_sum = tf.reduce_sum(exp_attn_scores, axis=-1, keepdims=True)
            attention_weights = exp_attn_scores / exp_sum
            return K.batch_dot(attention_weights, right)

        attn_layer = keras.layers.Lambda(attention)
        left_attn_vec = attn_layer([encoded_left, encoded_right])
        concat = keras.layers.Concatenate(axis=1)(
            [left_attn_vec, encoded_right])
        lstm_merge = keras.layers.LSTM(self._params['lstm_num_units'] * 2,
                                       return_sequences=False,
                                       name='lstm_merge')
        merged = lstm_merge(concat)
        dropout = keras.layers.Dropout(
            rate=self._params['dropout_rate'])(merged)

        phi = keras.layers.Dense(self._params['fc_num_units'],
                                 activation='tanh')(dropout)
        inputs = [input_left, input_right]
        out = self._make_output_layer()(phi)
        self._backend = keras.Model(inputs=inputs, outputs=[out]) 
Example 74
Project: TemporalActionParsing-FineGrained   Author: yz-cnsdqz   File: tf_models.py    MIT License 4 votes vote down vote up
def convolution_residual_module(model, n_nodes, conv_len, 
                                dropout_ratio=0.3,
                                activation='norm_relu',
                                shortcut_processing='padding'):
    model0 = model

    # if shortcut has different dimensions, perform 1x1 convolution
    model0_dim = model0.get_shape().as_list()[-1]

    
    if model0_dim != n_nodes:
        if shortcut_processing == '1x1conv':
            model0 = convolution_module(model0, n_nodes, 1, dropout_ratio=0.3,
                           activation='norm_relu')
        elif shortcut_processing == 'padding':
            model0 = Lambda(lambda x: zero_padding_feature_dim(x, n_nodes))(model0)
            
    model = convolution_module(model, n_nodes, conv_len, dropout_ratio=dropout_ratio,
                               activation=activation)
    model = keras.layers.Add()([model, model0])

    return model





# def time_sequence_warping_linear(x, offset):
#     x_lift = K.expand_dims(x, axis=1) # from [batch, time, channel] to [batch, 1, time, channel]
#     x_lift = K.repeat_elements(x_lift, 2, axis=1) # to [batch, 2, time, channel]

#     # padding 0 to horizental direction
#     offset = K.permute_dimensions(offset, (0, 2, 1) ) #[batch, time, 1] to [batch, 1, time]
#     offset = ZeroPadding1D((1, 0))(offset) # [batch, 1, time] to [batch, 2, time]
#     offset = K.permute_dimensions(offset, (0, 2, 1) ) #[batch, 2, time] to [batch, time, 2]
#     offset_lift = K.expand_dims(offset, axis=1) # from [batch, time, 2] to [batch, 1, time, 2]
#     offset_lift = K.repeat_elements(offset_lift, 2, axis=1) # to [batch, 2, time, 2]    

#     # apply image warping
#     x_warp_list = K.tf.contrib.image.dense_image_warp(x_lift, offset_lift)
#     x_warp = x_warp_list[:,0,:,:]

#     return x_warp 
Example 75
Project: DeepPhotoshop   Author: KhyatiGanatra   File: pconv_layer.py    MIT License 4 votes vote down vote up
def call(self, inputs, mask=None):
        '''
        We will be using the Keras conv2d method, and essentially we have
        to do here is multiply the mask with the input X, before we apply the
        convolutions. For the mask itself, we apply convolutions with all weights
        set to 1.
        Subsequently, we set all mask values >0 to 1, and otherwise 0
        ''' 
        
        # Both image and mask must be supplied
        if type(inputs) is not list or len(inputs) != 2:
            raise Exception('PartialConvolution2D must be called on a list of two tensors [img, mask]. Instead got: ' + str(inputs))
            
        # Create normalization. Slight change here compared to paper, using mean mask value instead of sum
        normalization = K.mean(inputs[1], axis=[1,2], keepdims=True)
        normalization = K.repeat_elements(normalization, inputs[1].shape[1], axis=1)
        normalization = K.repeat_elements(normalization, inputs[1].shape[2], axis=2)

        # Apply convolutions to image
        img_output = K.conv2d(
            (inputs[0]*inputs[1]) / normalization, self.kernel, 
            strides=self.strides,
            padding=self.padding,
            data_format=self.data_format,
            dilation_rate=self.dilation_rate
        )
        
        # Apply convolutions to mask
        mask_output = K.conv2d(
            inputs[1], self.kernel_mask, 
            strides=self.strides,
            padding=self.padding,            
            data_format=self.data_format,
            dilation_rate=self.dilation_rate
        )
        
        # Where something happened, set 1, otherwise 0        
        mask_output = K.cast(K.greater(mask_output, 0), 'float32')
        
        # Apply bias only to the image (if chosen to do so)
        if self.use_bias:
            img_output = K.bias_add(
                img_output,
                self.bias,
                data_format=self.data_format)
                
        # Apply activations on the image
        if self.activation is not None:
            img_output = self.activation(img_output)
            
        return [img_output, mask_output] 
Example 76
Project: recurrent-attention-for-QA-SQUAD-based-on-keras   Author: wentaozhu   File: rnnlayer.py    MIT License 4 votes vote down vote up
def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        #B_U = states[1]  # dropout matrices for recurrent units
        #B_W = states[2]
        h_tm1a = K.dot(h_tm1, self.Wa)
        eij = K.dot(K.tanh(h_tm1a + self.ha), self.Va)
        eijs = K.repeat_elements(eij, self.h_dim, axis=1)

        #alphaij = K.softmax(eijs) # batchsize * lenh       h batchsize * lenh * ndim
        #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0])
        #cisum = K.sum(ci, axis=1)
        cisum = eijs*self.h
        #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape)

        zr = K.sigmoid(K.dot(inputs, self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr))
        zi = zr[:, :self.units]
        ri = zr[:, self.units: 2 * self.units]
        si_ = K.tanh(K.dot(inputs, self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C))
        si = (1-zi) * h_tm1 + zi * si_
        return si, [si] #h_tm1, [h_tm1]
        '''if self.consume_less == 'gpu':

            matrix_x = K.dot(x * B_W[0], self.W) + self.b
            matrix_inner = K.dot(h_tm1 * B_U[0], self.U[:, :2 * self.units])

            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units: 2 * self.units]
            inner_z = matrix_inner[:, :self.units]
            inner_r = matrix_inner[:, self.units: 2 * self.units]

            z = self.inner_activation(x_z + inner_z)
            r = self.inner_activation(x_r + inner_r)

            x_h = matrix_x[:, 2 * self.units:]
            inner_h = K.dot(r * h_tm1 * B_U[0], self.U[:, 2 * self.units:])
            hh = self.activation(x_h + inner_h)
        else:
            if self.consume_less == 'cpu':
                x_z = x[:, :self.units]
                x_r = x[:, self.units: 2 * self.units]
                x_h = x[:, 2 * self.units:]
            elif self.consume_less == 'mem':
                x_z = K.dot(x * B_W[0], self.W_z) + self.b_z
                x_r = K.dot(x * B_W[1], self.W_r) + self.b_r
                x_h = K.dot(x * B_W[2], self.W_h) + self.b_h
            else:
                raise ValueError('Unknown `consume_less` mode.')
            z = self.inner_activation(x_z + K.dot(h_tm1 * B_U[0], self.U_z))
            r = self.inner_activation(x_r + K.dot(h_tm1 * B_U[1], self.U_r))

            hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], self.U_h))
        h = z * h_tm1 + (1 - z) * hh
        return h, [h]''' 
Example 77
Project: onto-lstm   Author: pdasigi   File: preposition_predictors.py    Apache License 2.0 4 votes vote down vote up
def build(self, input_shape):
        # The composition types are taken from Belinkov et al.'s TACL 2014 paper:
        # HC: Head-Child; HPC: Head-Prep-Child; HPCT: Head-Prep-Child-Ternary.
        assert self.composition_type in self.allowed_compositions, "Unknown composition type: %s" % self.composition_type
        if isinstance(input_shape[0], tuple):
            # This layer has multiple inputs (RelationPredictor).
            input_dim = input_shape[0][-1]
            input_length = input_shape[0][1]
        else:
            input_dim = input_shape[-1]
            input_length = input_shape[1]
        if self.proj_dim is None:
            self.proj_dim = int(input_dim / 2)
        if self.composition_type == 'HPCD':
            max_num_heads = input_length - 2
            # Clipping number of distance based projection matrices to 5.
            num_head_projectors = min(max_num_heads, 5)
            self.proj_head = self.init((num_head_projectors, input_dim, self.proj_dim))
            if max_num_heads > num_head_projectors:
                diff = max_num_heads - num_head_projectors
                farthest_head_proj = K.expand_dims(self.proj_head[0, :, :], dim=0)  # (1, input_dim, proj_dim)
                # (diff, input_dim, proj_dim)
                tiled_farthest_head_proj = K.repeat_elements(farthest_head_proj, diff, 0)
                # (head_size, input_dim, proj_dim)
                self.dist_proj_head = K.concatenate([tiled_farthest_head_proj, self.proj_head], axis=0)
            else:
                self.dist_proj_head = self.proj_head
        else:
            self.proj_head = self.init((input_dim, self.proj_dim), name='{}_proj_head'.format(self.name))
        self.proj_prep = self.init((input_dim, self.proj_dim), name='{}_proj_prep'.format(self.name))
        self.proj_child = self.init((input_dim, self.proj_dim), name='{}_proj_child'.format(self.name))
        self.trainable_weights = [self.proj_head, self.proj_prep, self.proj_child]
        self.hidden_layers = []
        if self.num_hidden_layers > 0:
            # This means we have to pass the composed representation through an MLP instead of directly computing
            # scores.
            for i in range(self.num_hidden_layers):
                hidden_layer = self.init((self.proj_dim, self.proj_dim), name='%s_hidden_layer_%d' % (self.name, i))
                self.hidden_layers.append(hidden_layer)
            self.trainable_weights.extend(self.hidden_layers)
        self.scorer = self.init((self.proj_dim, self.score_dim), name='{}_scorer'.format(self.name))
        self.trainable_weights.append(self.scorer) 
Example 78
Project: CIKM-AnalytiCup-2018   Author: zake7749   File: utils.py    Apache License 2.0 4 votes vote down vote up
def __call__(self, q, k, v, mask=None):
        d_k, d_v = self.d_k, self.d_v
        n_head = self.n_head

        if self.mode == 0:
            qs = self.qs_layer(q)  # [batch_size, len_q, n_head*d_k]
            ks = self.ks_layer(k)
            vs = self.vs_layer(v)

            def reshape1(x):
                s = tf.shape(x)   # [batch_size, len_q, n_head * d_k]
                x = tf.reshape(x, [s[0], s[1], n_head, d_k])
                x = tf.transpose(x, [2, 0, 1, 3])  
                x = tf.reshape(x, [-1, s[1], d_k])  # [n_head * batch_size, len_q, d_k]
                return x
            qs = Lambda(reshape1)(qs)
            ks = Lambda(reshape1)(ks)
            vs = Lambda(reshape1)(vs)

            mask = Lambda(lambda x:K.repeat_elements(x, n_head, 0))(mask)
            head, attn = self.attention(qs, ks, vs, mask=mask)  

            def reshape2(x):
                s = tf.shape(x)   # [n_head * batch_size, len_v, d_v]
                x = tf.reshape(x, [n_head, -1, s[1], s[2]]) 
                x = tf.transpose(x, [1, 2, 0, 3])
                x = tf.reshape(x, [-1, s[1], n_head*d_v])  # [batch_size, len_v, n_head * d_v]
                return x
            head = Lambda(reshape2)(head)
        elif self.mode == 1:
            heads = []; attns = []
            for i in range(n_head):
                qs = self.qs_layers[i](q)   
                ks = self.ks_layers[i](k) 
                vs = self.vs_layers[i](v) 
                head, attn = self.attention(qs, ks, vs, mask)
                heads.append(head); attns.append(attn)
            head = Concatenate()(heads)
            attn = Concatenate()(attns)
        outputs = self.w_o(head)
        outputs = Dropout(self.dropout)(outputs)
        outputs = Add()([outputs, q])
        return self.layer_norm(outputs), attn 
Example 79
Project: deepcaps   Author: brjathu   File: capslayers.py    MIT License 4 votes vote down vote up
def call(self, inputs, training=None):
        # inputs.shape=[None, input_num_capsule, input_dim_capsule]
        # inputs_expand.shape=[None, 1, input_num_capsule, input_dim_capsule]
        inputs_expand = K.expand_dims(inputs, 1)

        # Replicate num_capsule dimension to prepare being multiplied by W
        # inputs_tiled.shape=[None, num_capsule, input_num_capsule, input_dim_capsule]
        inputs_tiled = K.tile(inputs_expand, [1, self.num_capsule, 1, 1])

        if(self.channels != 0):
            W2 = K.repeat_elements(self.W, int(self.input_num_capsule / self.channels), 1)
        else:
            W2 = self.W
        # Compute `inputs * W` by scanning inputs_tiled on dimension 0.
        # x.shape=[num_capsule, input_num_capsule, input_dim_capsule]
        # W.shape=[num_capsule, input_num_capsule, dim_capsule, input_dim_capsule]
        # Regard the first two dimensions as `batch` dimension,
        # then matmul: [input_dim_capsule] x [dim_capsule, input_dim_capsule]^T -> [dim_capsule].
        # inputs_hat.shape = [None, num_capsule, input_num_capsule, dim_capsule]
        inputs_hat = K.map_fn(lambda x: K.batch_dot(x, W2, [2, 3]), elems=inputs_tiled)

        # Begin: Routing algorithm ---------------------------------------------------------------------#
        # The prior for coupling coefficient, initialized as zeros.
        # b.shape = [None, self.num_capsule, self.input_num_capsule].
        b = tf.zeros(shape=[K.shape(inputs_hat)[0], self.num_capsule, self.input_num_capsule])

        assert self.routings > 0, 'The routings should be > 0.'
        for i in range(self.routings):
            # c.shape=[batch_size, num_capsule, input_num_capsule]
            c = tf.nn.softmax(b, dim=1)

            # c.shape =  [batch_size, num_capsule, input_num_capsule]
            # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule]
            # The first two dimensions as `batch` dimension,
            # then matmal: [input_num_capsule] x [input_num_capsule, dim_capsule] -> [dim_capsule].
            # outputs.shape=[None, num_capsule, dim_capsule]
            outputs = squash(K.batch_dot(c, inputs_hat, [2, 2]) + self.B)  # [None, 10, 16]

            if i < self.routings - 1:
                # outputs.shape =  [None, num_capsule, dim_capsule]
                # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule]
                # The first two dimensions as `batch` dimension,
                # then matmal: [dim_capsule] x [input_num_capsule, dim_capsule]^T -> [input_num_capsule].
                # b.shape=[batch_size, num_capsule, input_num_capsule]
                b += K.batch_dot(outputs, inputs_hat, [2, 3])
        # End: Routing algorithm -----------------------------------------------------------------------#

        return outputs 
Example 80
Project: deep_qa   Author: allenai   File: gated_attention.py    Apache License 2.0 4 votes vote down vote up
def call(self, inputs, mask=None):
        # document_matrix is of shape (batch, document length, biGRU hidden length).
        # question_matrix is of shape (batch, question length, biGRU hidden length).
        # normalized_qd_attention is of shape (batch, document length, question length).
        document_matrix, question_matrix, normalized_qd_attention = inputs
        if mask is None:
            document_mask = K.ones_like(document_matrix)[:, :, 0]
        else:
            document_mask = mask[0]

        # question_update is of shape (batch, document length, bigru hidden).
        question_update = K.batch_dot(normalized_qd_attention, question_matrix, axes=[2, 1])

        # We use the gating function to calculate the new document representation
        # which is of shape (batch, document length, biGRU hidden length).
        masked_representation = None
        if self.gating_function == "||":
            # shape (batch, document length, biGRU hidden length*2)
            unmasked_representation = K.concatenate([question_update, document_matrix])
            # Apply the mask from the document to zero out things that should be masked.
            # The mask is of shape (batch, document length), so we tile it to
            # shape (batch, document length, biGRU hidden length*2)
            tiled_mask = K.repeat_elements(K.expand_dims(document_mask, axis=2),
                                           (2*K.int_shape(document_matrix)[2]), 2)
            masked_representation = switch(tiled_mask, unmasked_representation,
                                           K.zeros_like(unmasked_representation))
            return masked_representation

        if self.gating_function == "*":
            unmasked_representation = question_update * document_matrix

        if self.gating_function == "+":
            # shape (batch, document length, biGRU hidden length)
            unmasked_representation = question_update + document_matrix

        # Apply the mask from the document to zero out things that should be masked.
        # The mask is of shape (batch, document length), so we tile it to
        # shape (batch, document length, biGRU hidden length)
        tiled_mask = K.repeat_elements(K.expand_dims(document_mask, axis=2),
                                       K.int_shape(document_matrix)[2], 2)
        masked_representation = switch(tiled_mask, unmasked_representation, K.zeros_like(unmasked_representation))

        if masked_representation is not None:
            return masked_representation
        else:
            raise ConfigurationError("Invalid gating function "
                                     "{}, expected one of {}".format(self.gating_function,
                                                                     GATING_FUNCTIONS))