Python keras.backend.expand_dims() Examples

The following are 30 code examples of keras.backend.expand_dims(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.backend , or try the search function .
Example #1
Source File: attention.py    From deephlapan with GNU General Public License v2.0 6 votes vote down vote up
def call(self, x, mask=None):
        eij = dot_product(x, self.W)

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        weighted_input = x * K.expand_dims(a)

        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result 
Example #2
Source File: keras_extensions.py    From visual_turing_test-tutorial with MIT License 6 votes vote down vote up
def time_distributed_masked_max(x, m):
    """
    Computes max along the first (time) dimension.

    In:
        x - input; a 3D tensor
        m - mask
        m_value - value for masking
    """
    # place infinities where mask is off
    m_value = 0.0
    tmp = K.switch(K.equal(m, 0.0), -numpy.inf, 0.0)
    x_with_inf = x + K.expand_dims(tmp)
    x_max = K.max(x_with_inf, axis=1) 
    r = K.switch(K.equal(x_max, -numpy.inf), m_value, x_max)
    return r 


## classes  ##

# Transforms existing layers to masked layers 
Example #3
Source File: attlayer.py    From DeepMoji with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        # computes a probability distribution over the timesteps
        # uses 'max trick' for numerical stability
        # reshape is done to avoid issue with Tensorflow
        # and 1-dimensional weights
        logits = K.dot(x, self.W)
        x_shape = K.shape(x)
        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))

        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            ai = ai * mask
        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
        weighted_input = x * K.expand_dims(att_weights)
        result = K.sum(weighted_input, axis=1)
        if self.return_attention:
            return [result, att_weights]
        return result 
Example #4
Source File: contrib.py    From steppy-toolkit with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        # computes a probability distribution over the timesteps
        # uses 'max trick' for numerical stability
        # reshape is done to avoid issue with Tensorflow
        # and 1-dimensional weights
        logits = K.dot(x, self.W)
        x_shape = K.shape(x)
        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))

        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            ai = ai * mask
        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
        weighted_input = x * K.expand_dims(att_weights)
        result = K.sum(weighted_input, axis=1)
        if self.return_attention:
            return [result, att_weights]
        return result 
Example #5
Source File: attention.py    From keras-utility-layer-collection with MIT License 6 votes vote down vote up
def step(self, x, states):  
        h = states[0]
        # states[1] necessary?
        
        # comes from the constants
        X_static = states[-2]
        # equals K.dot(static_x, self._W1) + self._b2 with X.shape=[bs, L, static_input_dim]
        total_x_static_prod = states[-1]

        # expand dims to add the vector which is only valid for this time step
        # to total_x_prod which is valid for all time steps
        hw = K.expand_dims(K.dot(h, self._W2), 1)
        additive_atn = total_x_static_prod + hw
        attention = K.softmax(K.dot(additive_atn, self._V), axis=1)
        static_x_weighted = K.sum(attention * X_static, [1])
        
        x = K.dot(K.concatenate([x, static_x_weighted], 1), self._W3) + self._b3

        h, new_states = self.layer.cell.call(x, states[:-2])
        
        # append attention to the states to "smuggle" it out of the RNN wrapper
        attention = K.squeeze(attention, -1)
        h = K.concatenate([h, attention])

        return h, new_states 
Example #6
Source File: capsule.py    From CapsNet with MIT License 6 votes vote down vote up
def call(self, inputs, **kwargs):
        # (batch_size, 1, input_num_capsule, input_dim_capsule)
        expand_inputs = K.expand_dims(inputs, axis=1)
        # (batch_size, num_capsule, input_num_capsule, input_dim_capsule)
        expand_inputs = K.tile(expand_inputs, (1, self.num_capsule, 1, 1))
        # (batch_size, num_capsule, input_num_capsule, dim_capsule)
        u_hat = K.map_fn(lambda x: K.batch_dot(x, self.W, axes=[2, 3]), expand_inputs)

        if self.num_routing <= 0:
            self.num_routing = 3
        # (batch_size, num_capsule, input_num_capsule)
        b = K.zeros((K.shape(u_hat)[0], self.num_capsule, self.input_num_capsule))
        for i in xrange(self.num_routing):
            # (batch_size, num_capsule, input_num_capsule)
            c = softmax(b, axis=1)
            # (batch_size, num_capsule, dim_capsule)
            s = K.batch_dot(c, u_hat, axes=[2, 2])
            squashed_s = squash(s)
            if i < self.num_routing - 1:
                # (batch_size, num_capsule, input_num_capsule)
                b += K.batch_dot(squashed_s, u_hat, axes=[2, 3])
        return squashed_s 
Example #7
Source File: backend.py    From bert4keras with Apache License 2.0 6 votes vote down vote up
def pool1d(
    x,
    pool_size,
    strides=1,
    padding='valid',
    data_format=None,
    pool_mode='max'
):
    """向量序列的pool函数
    """
    x = K.expand_dims(x, 1)
    x = K.pool2d(
        x,
        pool_size=(1, pool_size),
        strides=(1, strides),
        padding=padding,
        data_format=data_format,
        pool_mode=pool_mode
    )
    return x[:, 0] 
Example #8
Source File: backend.py    From bert4keras with Apache License 2.0 6 votes vote down vote up
def sequence_masking(x, mask, mode=0, axis=None):
    """为序列条件mask的函数
    mask: 形如(batch_size, seq_len)的0-1矩阵;
    mode: 如果是0,则直接乘以mask;
          如果是1,则在padding部分减去一个大正数。
    axis: 序列所在轴,默认为1;
    """
    if mask is None or mode not in [0, 1]:
        return x
    else:
        if axis is None:
            axis = 1
        if axis == -1:
            axis = K.ndim(x) - 1
        assert axis > 0, 'axis muse be greater than 0'
        for _ in range(axis - 1):
            mask = K.expand_dims(mask, 1)
        for _ in range(K.ndim(x) - K.ndim(mask) - axis + 1):
            mask = K.expand_dims(mask, K.ndim(mask))
        if mode == 0:
            return x * mask
        else:
            return x - (1 - mask) * 1e12 
Example #9
Source File: mrf.py    From image-analogies with MIT License 6 votes vote down vote up
def make_patches_grid(x, patch_size, patch_stride):
    '''Break image `x` up into a grid of patches.

    input shape: (channels, rows, cols)
    output shape: (rows, cols, channels, patch_rows, patch_cols)
    '''
    from theano.tensor.nnet.neighbours import images2neibs  # TODO: all K, no T
    x = K.expand_dims(x, 0)
    xs = K.shape(x)
    num_rows = 1 + (xs[-2] - patch_size) // patch_stride
    num_cols = 1 + (xs[-1] - patch_size) // patch_stride
    num_channels = xs[-3]
    patches = images2neibs(x,
        (patch_size, patch_size), (patch_stride, patch_stride),
        mode='valid')
    # neibs are sorted per-channel
    patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size))
    patches = K.permute_dimensions(patches, (1, 0, 2, 3))
    # arrange in a 2d-grid (rows, cols, channels, px, py)
    patches = K.reshape(patches, (num_rows, num_cols, num_channels, patch_size, patch_size))
    patches_norm = K.sqrt(K.sum(K.square(patches), axis=(2,3,4), keepdims=True))
    return patches, patches_norm 
Example #10
Source File: models.py    From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)
        ait = dot_product(uit, self.u)

        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example #11
Source File: models.py    From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)
        ait = dot_product(uit, self.u)

        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example #12
Source File: visualizer.py    From backdoor with MIT License 6 votes vote down vote up
def save_tmp_func(self, step):

        cur_mask = K.eval(self.mask_upsample_tensor)
        cur_mask = cur_mask[0, ..., 0]
        img_filename = (
            '%s/%s' % (self.tmp_dir, 'tmp_mask_step_%d.png' % step))
        utils_backdoor.dump_image(np.expand_dims(cur_mask, axis=2) * 255,
                                  img_filename,
                                  'png')

        cur_fusion = K.eval(self.mask_upsample_tensor *
                            self.pattern_raw_tensor)
        cur_fusion = cur_fusion[0, ...]
        img_filename = (
            '%s/%s' % (self.tmp_dir, 'tmp_fusion_step_%d.png' % step))
        utils_backdoor.dump_image(cur_fusion, img_filename, 'png')

        pass 
Example #13
Source File: attention_with_context.py    From DeepResearch with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)
        ait = dot_product(uit, self.u)

        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example #14
Source File: layers.py    From voxelmorph with GNU General Public License v3.0 6 votes vote down vote up
def call(self, x):
        # previous mean
        pre_mean = self.mean
    
        # compute this batch stats
        this_sum = tf.reduce_sum(x, 0)
        this_bs = tf.cast(K.shape(x)[0], 'float32')  # this batch size
        
        # increase count and compute weights
        new_count = self.count + this_bs
        alpha = this_bs/K.minimum(new_count, self.cap)
        
        # compute new mean. Note that once we reach self.cap (e.g. 1000), the 'previous mean' matters less
        new_mean = pre_mean * (1-alpha) + (this_sum/this_bs) * alpha
        
        updates = [(self.count, new_count), (self.mean, new_mean)]
        self.add_update(updates, x)
        
        # the first few 1000 should not matter that much towards this cost
        return K.minimum(1., new_count/self.cap) * K.expand_dims(new_mean, 0) 
Example #15
Source File: bert.py    From keras-bert-ner with MIT License 6 votes vote down vote up
def compute_attention_mask(self, layer_id, segment_ids):
        """为seq2seq采用特定的attention mask
        """
        if self.attention_mask is None:

            def seq2seq_attention_mask(s, repeats=1):
                seq_len = K.shape(s)[1]
                ones = K.ones((1, repeats, seq_len, seq_len))
                a_mask = tf.linalg.band_part(ones, -1, 0)
                s_ex12 = K.expand_dims(K.expand_dims(s, 1), 2)
                s_ex13 = K.expand_dims(K.expand_dims(s, 1), 3)
                a_mask = (1 - s_ex13) * (1 - s_ex12) + s_ex13 * a_mask
                a_mask = K.reshape(a_mask, (-1, seq_len, seq_len))
                return a_mask

            self.attention_mask = Lambda(
                seq2seq_attention_mask,
                arguments={"repeats": self.num_attention_heads},
                name="Attention-Mask")(segment_ids)

        return self.attention_mask 
Example #16
Source File: ChainCRF.py    From elmo-bilstm-cnn-crf with Apache License 2.0 6 votes vote down vote up
def add_boundary_energy(x, b_start=None, b_end=None, mask=None):
    '''Given the observations x, it adds the start boundary energy b_start (resp.
    end boundary energy b_end on the start (resp. end) elements and multiplies
    the mask.'''
    if mask is None:
        if b_start is not None:
            x = K.concatenate([x[:, :1, :] + b_start, x[:, 1:, :]], axis=1)
        if b_end is not None:
            x = K.concatenate([x[:, :-1, :], x[:, -1:, :] + b_end], axis=1)
    else:
        mask = K.cast(mask, K.floatx())
        mask = K.expand_dims(mask, 2)
        x *= mask
        if b_start is not None:
            mask_r = K.concatenate([K.zeros_like(mask[:, :1]), mask[:, :-1]], axis=1)
            start_mask = K.cast(K.greater(mask, mask_r), K.floatx())
            x = x + start_mask * b_start
        if b_end is not None:
            mask_l = K.concatenate([mask[:, 1:], K.zeros_like(mask[:, -1:])], axis=1)
            end_mask = K.cast(K.greater(mask, mask_l), K.floatx())
            x = x + end_mask * b_end
    return x 
Example #17
Source File: ChainCRF.py    From elmo-bilstm-cnn-crf with Apache License 2.0 6 votes vote down vote up
def _forward(x, reduce_step, initial_states, U, mask=None):
    '''Forward recurrence of the linear chain crf.'''

    def _forward_step(energy_matrix_t, states):
        alpha_tm1 = states[-1]
        new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t)
        return new_states[0], new_states

    U_shared = K.expand_dims(K.expand_dims(U, 0), 0)

    if mask is not None:
        mask = K.cast(mask, K.floatx())
        mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3)
        U_shared = U_shared * mask_U

    inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared
    inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1)

    last, values, _ = K.rnn(_forward_step, inputs, initial_states)
    return last, values 
Example #18
Source File: attention.py    From keras-utility-layer-collection with MIT License 6 votes vote down vote up
def step(self, x, states):   
        h = states[0]
        # states[1] necessary?

        # equals K.dot(X, self._W1) + self._b2 with X.shape=[bs, T, input_dim]
        total_x_prod = states[-1]
        # comes from the constants (equals the input sequence)
        X = states[-2]
        
        # expand dims to add the vector which is only valid for this time step
        # to total_x_prod which is valid for all time steps
        hw = K.expand_dims(K.dot(h, self._W2), 1)
        additive_atn = total_x_prod + hw
        attention = K.softmax(K.dot(additive_atn, self._V), axis=1)
        x_weighted = K.sum(attention * X, [1])

        x = K.dot(K.concatenate([x, x_weighted], 1), self._W3) + self._b3
        
        h, new_states = self.layer.cell.call(x, states[:-2])
        
        return h, new_states 
Example #19
Source File: ChainCRF.py    From elmo-bilstm-cnn-crf with Apache License 2.0 6 votes vote down vote up
def _backward(gamma, mask):
    '''Backward recurrence of the linear chain crf.'''
    gamma = K.cast(gamma, 'int32')

    def _backward_step(gamma_t, states):
        y_tm1 = K.squeeze(states[0], 0)
        y_t = batch_gather(gamma_t, y_tm1)
        return y_t, [K.expand_dims(y_t, 0)]

    initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)]
    _, y_rev, _ = K.rnn(_backward_step,
                        gamma,
                        initial_states,
                        go_backwards=True)
    y = K.reverse(y_rev, 1)

    if mask is not None:
        mask = K.cast(mask, dtype='int32')
        # mask output
        y *= mask
        # set masked values to -1
        y += -(1 - mask)
    return y 
Example #20
Source File: attention_layer.py    From text-classifier with Apache License 2.0 6 votes vote down vote up
def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)

        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return output 
Example #21
Source File: capsulelayers.py    From Multi-level-DCNet with GNU General Public License v3.0 6 votes vote down vote up
def call(self, inputs, **kwargs):
        if type(inputs) is list:  # true label is provided with shape = [None, n_classes], i.e. one-hot code.
            assert len(inputs) == 2
            inputs, mask = inputs
        else:  # if no true label, mask by the max length of capsules. Mainly used for prediction
            # compute lengths of capsules
            x = K.sqrt(K.sum(K.square(inputs), -1))
            # generate the mask which is a one-hot code.
            # mask.shape=[None, n_classes]=[None, num_capsule]
            mask = K.one_hot(indices=K.argmax(x, 1), num_classes=x.get_shape().as_list()[1])

        # inputs.shape=[None, num_capsule, dim_capsule]
        # mask.shape=[None, num_capsule]
        # masked.shape=[None, num_capsule * dim_capsule]
        masked = K.batch_flatten(inputs * K.expand_dims(mask, -1))
        return masked 
Example #22
Source File: capsulelayers.py    From textcaps with MIT License 6 votes vote down vote up
def call(self, inputs, training=None):
        inputs_expand = K.expand_dims(inputs, 1)
        
        inputs_tiled = K.tile(inputs_expand, [1, self.num_capsule, 1, 1])
        
        if(self.channels!=0):
            W2 = K.repeat_elements(self.W,int(self.input_num_capsule/self.channels),1)
        else:
            W2 = self.W
            
        inputs_hat = K.map_fn(lambda x: K.batch_dot(x, W2, [2, 3]) , elems=inputs_tiled)

        b = tf.zeros(shape=[K.shape(inputs_hat)[0], self.num_capsule, self.input_num_capsule])

        assert self.routings > 0, 'The routings should be > 0.'
        for i in range(self.routings):

            c = tf.nn.softmax(b, dim=1)
            outputs = squash(K.batch_dot(c, inputs_hat, [2, 2])+ self.B)

            if i < self.routings - 1:
                b += K.batch_dot(outputs, inputs_hat, [2, 3])

        return outputs 
Example #23
Source File: model.py    From keras-yolov3-KF-objectTracking with MIT License 5 votes vote down vote up
def box_iou(b1, b2):
    '''Return iou tensor

    Parameters
    ----------
    b1: tensor, shape=(i1,...,iN, 4), xywh
    b2: tensor, shape=(j, 4), xywh

    Returns
    -------
    iou: tensor, shape=(i1,...,iN, j)

    '''

    # Expand dim to apply broadcasting.
    b1 = K.expand_dims(b1, -2)
    b1_xy = b1[..., :2]
    b1_wh = b1[..., 2:4]
    b1_wh_half = b1_wh/2.
    b1_mins = b1_xy - b1_wh_half
    b1_maxes = b1_xy + b1_wh_half

    # Expand dim to apply broadcasting.
    b2 = K.expand_dims(b2, 0)
    b2_xy = b2[..., :2]
    b2_wh = b2[..., 2:4]
    b2_wh_half = b2_wh/2.
    b2_mins = b2_xy - b2_wh_half
    b2_maxes = b2_xy + b2_wh_half

    intersect_mins = K.maximum(b1_mins, b2_mins)
    intersect_maxes = K.minimum(b1_maxes, b2_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
    iou = intersect_area / (b1_area + b2_area - intersect_area)

    return iou 
Example #24
Source File: attention.py    From deephlapan with GNU General Public License v2.0 5 votes vote down vote up
def dot_product(x, kernel):
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel) 
Example #25
Source File: tf_normal_sampler.py    From social_lstm_keras_tf with GNU General Public License v3.0 5 votes vote down vote up
def _to_normal2d(output_batch) -> ds.MultivariateNormalTriL:
    """
    :param output_batch: (n_samples, 5)
    :return
    """

    # mean of x and y
    x_mean = Lambda(lambda o: o[:, 0])(output_batch)
    y_mean = Lambda(lambda o: o[:, 1])(output_batch)

    # std of x and y
    # std is must be 0 or positive
    x_std = Lambda(lambda o: K.exp(o[:, 2]))(output_batch)
    y_std = Lambda(lambda o: K.exp(o[:, 3]))(output_batch)

    # correlation coefficient
    # correlation coefficient range is [-1, 1]
    cor = Lambda(lambda o: K.tanh(o[:, 4]))(output_batch)

    loc = Concatenate()([
        Lambda(lambda x_mean: K.expand_dims(x_mean, 1))(x_mean),
        Lambda(lambda y_mean: K.expand_dims(y_mean, 1))(y_mean)
    ])

    x_var = Lambda(lambda x_std: K.square(x_std))(x_std)
    y_var = Lambda(lambda y_std: K.square(y_std))(y_std)
    xy_cor = Multiply()([x_std, y_std, cor])

    cov = Lambda(lambda inputs: K.stack(inputs, axis=0))(
        [x_var, xy_cor, xy_cor, y_var])
    cov = Lambda(lambda cov: K.permute_dimensions(cov, (1, 0)))(cov)
    cov = Reshape((2, 2))(cov)

    scale_tril = Lambda(lambda cov: tf.cholesky(cov))(cov)
    mvn = ds.MultivariateNormalTriL(loc, scale_tril)

    return mvn 
Example #26
Source File: attention.py    From keras-utility-layer-collection with MIT License 5 votes vote down vote up
def _additive_similarity(self, source, query):
        concatenation = K.concatenate([source, query], axis=2)
        nonlinearity = K.tanh(K.dot(concatenation, self._weights["w_a"]))
        
        # tile the weight vector (1, 1, dim) for each time step and each element of the batch -> (bs, T, dim)
        source_shape = K.shape(source)
        vaeff = K.tile(K.expand_dims(self._weights["v_a"], 0), [source_shape[0], source_shape[1], 1])

        similarity = K.batch_dot(K.permute_dimensions(vaeff, [0, 2, 1]), nonlinearity, axes=[1, 2])
        
        return similarity 
Example #27
Source File: layers.py    From keras-utilities with MIT License 5 votes vote down vote up
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        # todo: check that this is correct
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel) 
Example #28
Source File: layers.py    From keras-utilities with MIT License 5 votes vote down vote up
def call(self, x, mask=None):
        eij = dot_product(x, self.W)

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        weighted_input = x * K.expand_dims(a)

        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result 
Example #29
Source File: patches.py    From image-analogies with MIT License 5 votes vote down vote up
def make_patches(x, patch_size, patch_stride):
    '''Break image `x` up into a bunch of patches.'''
    from theano.tensor.nnet.neighbours import images2neibs
    x = K.expand_dims(x, 0)
    patches = images2neibs(x,
        (patch_size, patch_size), (patch_stride, patch_stride),
        mode='valid')
    # neibs are sorted per-channel
    patches = K.reshape(patches, (K.shape(x)[1], K.shape(patches)[0] // K.shape(x)[1], patch_size, patch_size))
    patches = K.permute_dimensions(patches, (1, 0, 2, 3))
    patches_norm = K.sqrt(K.sum(K.square(patches), axis=(1,2,3), keepdims=True))
    return patches, patches_norm 
Example #30
Source File: capsulelayers.py    From textcaps with MIT License 5 votes vote down vote up
def call(self, inputs, **kwargs):
        if type(inputs) is list:
            assert len(inputs) == 2
            inputs, mask = inputs
        else: 
            x = K.sqrt(K.sum(K.square(inputs), -1))
            mask = K.one_hot(indices=K.argmax(x, 1), num_classes=x.get_shape().as_list()[1])

        masked = K.batch_flatten(inputs * K.expand_dims(mask, -1))
        return masked