Python keras.backend.dot() Examples

The following are code examples for showing how to use keras.backend.dot(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: deep-models   Author: LaurentMazare   File: rhn.py    Apache License 2.0 6 votes vote down vote up
def step(self, x, states):
    h_st, B_U, B_W = states

    if self.consume_less == 'cpu':
      x_t = x[:, :self.output_dim]
      x_h = x[:, self.output_dim: 2 * self.output_dim]
    elif self.consume_less == 'mem':
      x_t = K.dot(x * B_W[0], self.W_t) + self.b_t
      x_h = K.dot(x * B_W[1], self.W_h) + self.b_h
    else:
      raise Exception('Unknown `consume_less` mode.')

    for l in xrange(self.L):
      if l == 0:
        t = self.inner_activation(x_t + K.dot(h_st * B_U[0], self.U_ts[l]) + self.b_ts[l])
        h = self.activation(x_h + K.dot(h_st * B_U[1], self.U_hs[l]) + self.b_hs[l])
      else:
        t = self.inner_activation(K.dot(h_st * B_U[0], self.U_ts[l]) + self.b_ts[l])
        h = self.activation(K.dot(h_st * B_U[1], self.U_hs[l]) + self.b_hs[l])
      h_st = h * t + h_st * (1 - t)

    return h_st, [h_st] 
Example 2
Project: keras-utility-layer-collection   Author: zimmerrol   File: attention.py    MIT License 6 votes vote down vote up
def step(self, x, states):   
        h = states[0]
        # states[1] necessary?

        # equals K.dot(X, self._W1) + self._b2 with X.shape=[bs, T, input_dim]
        total_x_prod = states[-1]
        # comes from the constants (equals the input sequence)
        X = states[-2]
        
        # expand dims to add the vector which is only valid for this time step
        # to total_x_prod which is valid for all time steps
        hw = K.expand_dims(K.dot(h, self._W2), 1)
        additive_atn = total_x_prod + hw
        attention = K.softmax(K.dot(additive_atn, self._V), axis=1)
        x_weighted = K.sum(attention * X, [1])

        x = K.dot(K.concatenate([x, x_weighted], 1), self._W3) + self._b3
        
        h, new_states = self.layer.cell.call(x, states[:-2])
        
        return h, new_states 
Example 3
Project: keras-utility-layer-collection   Author: zimmerrol   File: attention.py    MIT License 6 votes vote down vote up
def step(self, x, states):  
        h = states[0]
        # states[1] necessary?
        
        # comes from the constants
        X_static = states[-2]
        # equals K.dot(static_x, self._W1) + self._b2 with X.shape=[bs, L, static_input_dim]
        total_x_static_prod = states[-1]

        # expand dims to add the vector which is only valid for this time step
        # to total_x_prod which is valid for all time steps
        hw = K.expand_dims(K.dot(h, self._W2), 1)
        additive_atn = total_x_static_prod + hw
        attention = K.softmax(K.dot(additive_atn, self._V), axis=1)
        static_x_weighted = K.sum(attention * X_static, [1])
        
        x = K.dot(K.concatenate([x, static_x_weighted], 1), self._W3) + self._b3

        h, new_states = self.layer.cell.call(x, states[:-2])
        
        # append attention to the states to "smuggle" it out of the RNN wrapper
        attention = K.squeeze(attention, -1)
        h = K.concatenate([h, attention])

        return h, new_states 
Example 4
Project: IJCAI_Keras_Defense   Author: gujingxiao   File: denseMoE.py    Apache License 2.0 6 votes vote down vote up
def call(self, inputs):

        expert_outputs = tf.tensordot(inputs, self.expert_kernel, axes=1)
        if self.use_expert_bias:
            expert_outputs = K.bias_add(expert_outputs, self.expert_bias)
        if self.expert_activation is not None:
            expert_outputs = self.expert_activation(expert_outputs)

        gating_outputs = K.dot(inputs, self.gating_kernel)
        if self.use_gating_bias:
            gating_outputs = K.bias_add(gating_outputs, self.gating_bias)
        if self.gating_activation is not None:
            gating_outputs = self.gating_activation(gating_outputs)

        output = K.sum(expert_outputs * K.repeat_elements(K.expand_dims(gating_outputs, axis=1), self.units, axis=1), axis=2)

        return output 
Example 5
Project: IJCAI_Keras_Defense   Author: gujingxiao   File: denseMoE.py    Apache License 2.0 6 votes vote down vote up
def call(self, inputs):

        expert_outputs = tf.tensordot(inputs, self.expert_kernel, axes=1)
        if self.use_expert_bias:
            expert_outputs = K.bias_add(expert_outputs, self.expert_bias)
        if self.expert_activation is not None:
            expert_outputs = self.expert_activation(expert_outputs)

        gating_outputs = K.dot(inputs, self.gating_kernel)
        if self.use_gating_bias:
            gating_outputs = K.bias_add(gating_outputs, self.gating_bias)
        if self.gating_activation is not None:
            gating_outputs = self.gating_activation(gating_outputs)

        output = K.sum(expert_outputs * K.repeat_elements(K.expand_dims(gating_outputs, axis=1), self.units, axis=1), axis=2)

        return output 
Example 6
Project: cbc_networks   Author: saralajew   File: reasoning_layers.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def call(self, inputs, **kwargs):
        # decode the reasoning probabilities
        positive_kernel = self.reasoning_probabilities[0]
        negative_kernel = (1 - positive_kernel) * \
                          self.reasoning_probabilities[1]

        if self.use_component_probabilities:
            # squash component probabilities
            components_probabilities = softmax(self.component_probabilities)

            positive_kernel = positive_kernel * components_probabilities
            negative_kernel = negative_kernel * components_probabilities

        # stabilize the division with a small epsilon
        probs = (K.dot(inputs, (positive_kernel - negative_kernel)) \
                 + K.sum(negative_kernel, 1)) \
                / (K.sum(positive_kernel + negative_kernel, 1) + K.epsilon())

        # squeeze replica dimension if one.
        if self.n_replicas == 1:
            probs = K.squeeze(probs, axis=1)
        else:
            probs = K.permute_dimensions(probs, (0, 2, 1))

        return probs 
Example 7
Project: lmtc-eurlex57k   Author: iliaschalkidis   File: attention.py    Apache License 2.0 6 votes vote down vote up
def call(self, x, mask=None):

        a = dot_product(x, self.Wa)

        def label_wise_attention(values):
            doc_repi, ai = values
            ai = K.softmax(K.transpose(ai))
            label_aware_doc_rep = K.dot(ai, doc_repi)
            if self.return_attention:
                return [label_aware_doc_rep, ai]
            else:
                return [label_aware_doc_rep, label_aware_doc_rep]

        label_aware_doc_reprs, attention_scores = K.tf.map_fn(label_wise_attention, [x, a])

        # Compute label-scores
        label_aware_doc_reprs = K.sum(label_aware_doc_reprs * self.Wo, axis=-1) + self.bo
        label_aware_doc_reprs = K.sigmoid(label_aware_doc_reprs)

        if self.return_attention:
            return [label_aware_doc_reprs, attention_scores]

        return label_aware_doc_reprs 
Example 8
Project: DeepLearn   Author: GauravBh1010tt   File: layers.py    MIT License 6 votes vote down vote up
def call(self , x, mask=None):
        
        e1=x[0].T
        e2=x[1].T
        
        batch_size = K.shape(x[0])[0]
        sim = []
        V_out = K.dot(self.V, K.concatenate([e1,e2],axis=0))     

        for i in range(self.k): 
            temp = K.batch_dot(K.dot(e1.T,self.W[i,:,:]),e2.T,axes=1)
            sim.append(temp)
        sim=K.reshape(sim,(self.k,batch_size))

        tensor_bi_product = self.activation(V_out+sim)
        tensor_bi_product = K.dot(self.U.T, tensor_bi_product).T

        return tensor_bi_product 
Example 9
Project: spektral   Author: danielegrattarola   File: pooling.py    MIT License 6 votes vote down vote up
def call(self, inputs):
        if self.data_mode == 'graph':
            X, I = inputs
            if K.ndim(I) == 2:
                I = I[:, 0]
        else:
            X = inputs
        inputs_linear = K.dot(X, self.lg_kernel) + self.lg_bias
        attn_map = K.dot(X, self.attn_kernel) + self.attn_bias
        attn_map = K.sigmoid(attn_map)
        masked_inputs = inputs_linear * attn_map
        if self.data_mode in {'single', 'batch'}:
            output = K.sum(masked_inputs, axis=-2, keepdims=self.data_mode=='single')
        else:
            output = tf.segment_sum(masked_inputs, I)

        return output 
Example 10
Project: spektral   Author: danielegrattarola   File: convolutional.py    MIT License 6 votes vote down vote up
def call(self, inputs):
        features = inputs[0]
        fltr_list = inputs[1:]

        # Convolution
        supports = list()
        for fltr in fltr_list:
            s = filter_dot(fltr, features)
            supports.append(s)
        supports = K.concatenate(supports, axis=-1)
        output = K.dot(supports, self.kernel)

        if self.use_bias:
            output = K.bias_add(output, self.bias)
        if self.activation is not None:
            output = self.activation(output)
        return output 
Example 11
Project: spektral   Author: danielegrattarola   File: convolutional.py    MIT License 6 votes vote down vote up
def call(self, inputs):
        features = inputs[0]
        fltr = inputs[1]

        if not K.is_sparse(fltr):
            fltr = tf.contrib.layers.dense_to_sparse(fltr)

        features_neigh = self.aggregate_op(
            tf.gather(features, fltr.indices[:, -1]), fltr.indices[:, -2]
        )
        output = K.concatenate([features, features_neigh])
        output = K.dot(output, self.kernel)

        if self.use_bias:
            output = K.bias_add(output, self.bias)
        if self.activation is not None:
            output = self.activation(output)
        output = K.l2_normalize(output, axis=-1)
        return output 
Example 12
Project: spektral   Author: danielegrattarola   File: ops.py    MIT License 6 votes vote down vote up
def filter_dot(fltr, features):
    """
    Performs the multiplication of a graph filter (N x N) with the node features,
    automatically dealing with single, mixed, and batch modes.
    :param fltr: the graph filter(s) (N x N in single and mixed mode,
    batch x N x N in batch mode).
    :param features: the node features (N x F in single mode, batch x N x F in
    mixed and batch mode).
    :return: the filtered features.
    """
    if len(K.int_shape(features)) == 2:
        # Single mode
        return K.dot(fltr, features)
    else:
        if len(K.int_shape(fltr)) == 3:
            # Batch mode
            return K.batch_dot(fltr, features)
        else:
            # Mixed mode
            return mixed_mode_dot(fltr, features) 
Example 13
Project: spektral   Author: danielegrattarola   File: ops.py    MIT License 6 votes vote down vote up
def matrix_power(x, k):
    """
    Computes the k-th power of a square matrix.
    :param x: a square matrix (Tensor or SparseTensor)
    :param k: exponent
    :return: matrix of same type and dtype as the input
    """
    if K.ndim(x) != 2:
        raise ValueError('x must have rank 2.')
    sparse = K.is_sparse(x)
    if sparse:
        x_dense = tf.sparse.to_dense(x)
    else:
        x_dense = x

    x_k = x_dense
    for _ in range(k - 1):
        x_k = K.dot(x_k, x_dense)

    if sparse:
        return tf.contrib.layers.dense_to_sparse(x_k)
    else:
        return x_k 
Example 14
Project: keras_extension   Author: k1414st   File: graph.py    MIT License 6 votes vote down vote up
def __graph_gate(self, x, w1, w2=None):
        """
        make a graph gate matrix by crossing each nodes latent states.

        Args:
            x: input Tensor of node-data after convolutioned.
               shape: (B(Batch_size), N(N_nodes), F_in(F_inputs))
            w1, w2: weight matrix variable
               (to transform input to gatable hidden states.)
               shape: (F_in, F_out(F_outputs))
        """
        if w2 is None:
            w2 = w1
        h1 = K.dot(x, w1)  # (B, N, F_out)
        h2 = K.dot(x, w2)  # (B, N, F_out)
        hh = K.batch_dot(h1, h2, axes=(2, 2))  # (B, N, N)
        return K.sigmoid(hh)  # (B, N, N) 
Example 15
Project: keras_extension   Author: k1414st   File: mac.py    MIT License 6 votes vote down vote up
def call(self, inputs, training=None):

        c_cur, m_prev, knowledge = inputs[0], inputs[1], inputs[2]

        Im = K.expand_dims(K.bias_add(K.dot(m_prev, self.im_kernel),
                                      self.im_bias), axis=1)
        Ik = K.bias_add(K.dot(knowledge, self.ik_kernel),
                        self.ik_bias)
        I = Im * Ik
        Id = K.bias_add(K.dot(K.concatenate([I, knowledge], axis=-1),
                              self.id_kernel),
                        self.id_bias)
        cI = K.expand_dims(c_cur, axis=1) * Id
        ra = K.bias_add(K.dot(cI, self.ra_kernel),
                        self.ra_bias)
        rv = self.attention_activation(ra, axis=1)
        r = K.sum(rv * knowledge, axis=1)
        return r 
Example 16
Project: keras_extension   Author: k1414st   File: mac.py    MIT License 6 votes vote down vote up
def call(self, inputs, training=None):

        c, r, m, msa = inputs[0], inputs[1], inputs[2], inputs[3]

        mi = K.bias_add(K.dot(K.concatenate([r, m], axis=-1),
                              self.mi_kernel),
                        self.mi_bias)

        md = K.bias_add(K.dot(msa, self.mdsa_kernel) +
                        K.dot(mi, self.mdi_kernel),
                        self.md_bias)
        cd = K.bias_add(K.dot(c, self.cd_kernel),
                        self.cd_bias)
        mi = self.forget_activation(cd) * m + \
            self.forget_activation(1-cd) * md
        return mi 
Example 17
Project: keras_extension   Author: k1414st   File: graph.py    MIT License 6 votes vote down vote up
def __graph_gate(self, x, w1, w2=None):
        """
        make a graph gate matrix by crossing each nodes latent states.

        Args:
            x: input Tensor of node-data after convolutioned.
               shape: (B(Batch_size), N(N_nodes), F_in(F_inputs))
            w1, w2: weight matrix variable
               (to transform input to gatable hidden states.)
               shape: (F_in, F_out(F_outputs))
        """
        if w2 is None:
            w2 = w1
        h1 = K.dot(x, w1)  # (B, N, F_out)
        h2 = K.dot(x, w2)  # (B, N, F_out)
        hh = K.batch_dot(h1, h2, axes=(2, 2))  # (B, N, N)
        return K.sigmoid(hh)  # (B, N, N) 
Example 18
Project: deep-learning-note   Author: wdxtub   File: 3_nerual_style_transfer.py    MIT License 5 votes vote down vote up
def gram_matrix(x):
    features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram 
Example 19
Project: deep-models   Author: LaurentMazare   File: lstm_ln.py    Apache License 2.0 5 votes vote down vote up
def step(self, x, states):
    h_tm1 = states[0]
    c_tm1 = states[1]
    B_U = states[2]
    B_W = states[3]

    if self.consume_less == 'gpu':
      z = self.norm(K.dot(x * B_W[0], self.W), 0) + self.norm(K.dot(h_tm1 * B_U[0], self.U), 1) + self.b

      z0 = z[:, :self.output_dim]
      z1 = z[:, self.output_dim: 2 * self.output_dim]
      z2 = z[:, 2 * self.output_dim: 3 * self.output_dim]
      z3 = z[:, 3 * self.output_dim:]

      i = self.inner_activation(z0)
      f = self.inner_activation(z1)
      c = f * c_tm1 + i * self.activation(z2)
      o = self.inner_activation(z3)
    else:
      assert (False)
      if self.consume_less == 'cpu':
        x_i = x[:, :self.output_dim]
        x_f = x[:, self.output_dim: 2 * self.output_dim]
        x_c = x[:, 2 * self.output_dim: 3 * self.output_dim]
        x_o = x[:, 3 * self.output_dim:]
      elif self.consume_less == 'mem':
        x_i = K.dot(x * B_W[0], self.W_i) + self.b_i
        x_f = K.dot(x * B_W[1], self.W_f) + self.b_f
        x_c = K.dot(x * B_W[2], self.W_c) + self.b_c
        x_o = K.dot(x * B_W[3], self.W_o) + self.b_o
      else:
        raise Exception('Unknown `consume_less` mode.')

      i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i))
      f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f))
      c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * B_U[2], self.U_c))
      o = self.inner_activation(x_o + K.dot(h_tm1 * B_U[3], self.U_o))

    h = o * self.activation(self.norm(c, 2))
    return h, [h, c] 
Example 20
Project: keras-utility-layer-collection   Author: zimmerrol   File: attention.py    MIT License 5 votes vote down vote up
def _additive_similarity(self, source, query):
        concatenation = K.concatenate([source, query], axis=2)
        nonlinearity = K.tanh(K.dot(concatenation, self._weights["w_a"]))
        
        # tile the weight vector (1, 1, dim) for each time step and each element of the batch -> (bs, T, dim)
        source_shape = K.shape(source)
        vaeff = K.tile(K.expand_dims(self._weights["v_a"], 0), [source_shape[0], source_shape[1], 1])

        similarity = K.batch_dot(K.permute_dimensions(vaeff, [0, 2, 1]), nonlinearity, axes=[1, 2])
        
        return similarity 
Example 21
Project: keras-utility-layer-collection   Author: zimmerrol   File: attention.py    MIT License 5 votes vote down vote up
def _multiplicative_similarity(self, source, query):
        qp = K.dot(query, self._weights["w_a"])
        similarity = K.batch_dot(K.permute_dimensions(qp, [0, 2, 1]), source, axes=[1, 2])
        
        return similarity 
Example 22
Project: keras-utility-layer-collection   Author: zimmerrol   File: attention.py    MIT License 5 votes vote down vote up
def get_constants(self, x):
        # add constants to speed up calculation
        constants = [x, K.dot(x, self._W1) + self._b2]
        
        return constants 
Example 23
Project: phoneticSimilarity   Author: ronggong   File: attentionWithContext.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel) 
Example 24
Project: phoneticSimilarity   Author: ronggong   File: attention.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        # todo: check that this is correct
        kernel = K.expand_dims(kernel)
        return K.squeeze(K.dot(x, kernel), axis=-1)
    else:
        return K.dot(x, kernel) 
Example 25
Project: cdc   Author: ckbjimmy   File: Attention.py    MIT License 5 votes vote down vote up
def dot_product(x, kernel):
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel) 
Example 26
Project: cdc   Author: ckbjimmy   File: Attention.py    MIT License 5 votes vote down vote up
def call(self, x, mask=None):
        uit = K.dot(x, self.W)
        
        if self.bias:
            uit += self.b
        
        uit = K.tanh(uit)
        
        mul_a = uit  * self.u # with this
        ait = K.sum(mul_a, axis=2) # and this
        
        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example 27
Project: gandlf   Author: codekansas   File: losses.py    MIT License 5 votes vote down vote up
def rbf_moment_matching(y_true, y_pred, sigmas=[2, 5, 10, 20, 40, 80]):
    """Generative moment matching loss with RBF kernel.

    Reference: https://arxiv.org/abs/1502.02761
    """

    warnings.warn('Moment matching loss is still in development.')

    if len(K.int_shape(y_pred)) != 2 or len(K.int_shape(y_true)) != 2:
        raise ValueError('RBF Moment Matching function currently only works '
                         'for outputs with shape (batch_size, num_features).'
                         'Got y_true="%s" and y_pred="%s".' %
                         (str(K.int_shape(y_pred)), str(K.int_shape(y_true))))

    sigmas = list(sigmas) if isinstance(sigmas, (list, tuple)) else [sigmas]

    x = K.concatenate([y_pred, y_true], 0)

    # Performs dot product between all combinations of rows in X.
    xx = K.dot(x, K.transpose(x))  # (batch_size, batch_size)

    # Performs dot product of all rows with themselves.
    x2 = K.sum(x * x, 1, keepdims=True)  # (batch_size, None)

    # Gets exponent entries of the RBF kernel (without sigmas).
    exponent = xx - 0.5 * x2 - 0.5 * K.transpose(x2)

    # Applies all the sigmas.
    total_loss = None
    for sigma in sigmas:
        kernel_val = K.exp(exponent / sigma)
        loss = K.sum(kernel_val)
        total_loss = loss if total_loss is None else loss + total_loss

    return total_loss 
Example 28
Project: gandlf   Author: codekansas   File: attention.py    MIT License 5 votes vote down vote up
def get_constants(self, x):
        constants = self.layer.get_constants(x)
        constants.append(K.dot(self.attention, self.attn_U_a))
        return constants 
Example 29
Project: gandlf   Author: codekansas   File: attention.py    MIT License 5 votes vote down vote up
def _compute_attention(self, h, attention):
        m = self.attn_activation(K.dot(h, self.attn_U_m) + attention +
                                 self.attn_b_m)
        s = self.attn_gate_func(K.dot(m, self.attn_U_s) + self.attn_b_s)
        return s 
Example 30
Project: gandlf   Author: codekansas   File: attention.py    MIT License 5 votes vote down vote up
def _compute_attention(self, h):
        time_weights = K.expand_dims(K.dot(h, self.attn_U_t) + self.attn_b_t,
                                     dim=-1)
        time_weights = self.time_dist_activation(time_weights)
        weighted_sum = K.sum(time_weights * self.attention, axis=1)
        attn_vec = K.dot(weighted_sum, self.attn_U_a) + self.attn_b_a
        return self.attn_gate_func(attn_vec) 
Example 31
Project: AI_Competition   Author: Decalogue   File: attention.py    MIT License 5 votes vote down vote up
def call(self, x, mask=None):
        input_shape = K.int_shape(x)

        features_dim = self.features_dim
        # step_dim = self.step_dim
        step_dim = input_shape[1]

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b[:input_shape[1]]

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
    	# print weigthted_input.shape
        return K.sum(weighted_input, axis=1) 
Example 32
Project: AI_Competition   Author: Decalogue   File: attention.py    MIT License 5 votes vote down vote up
def call(self, inputs, mask=None):
        en = inputs[0]
        de = inputs[1]
        de_shape = K.int_shape(de)
        step_dim = de_shape[1]

        hid_en = K.dot(en, self.W_en1)
        hid_de = K.dot(de, self.W_en2)
        if self.bias:
            hid_en += self.b_en1
            hid_de += self.b_en2
        hid = K.tanh(K.expand_dims(hid_en, axis=1) + hid_de)
        eij = K.reshape(K.dot(hid, K.reshape(self.W_de, (self.hid_size, 1))), (-1, step_dim))
        if self.bias:
            eij += self.b_de[:step_dim]

        a = K.exp(eij - K.max(eij, axis=-1, keepdims=True))

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask[1], K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = de * a
        return K.sum(weighted_input, axis=1) 
Example 33
Project: deepflying   Author: dslab-deepflying   File: styleTransfer.py    GNU General Public License v3.0 5 votes vote down vote up
def gram_matrix(x):
    features = K.batch_flatten(K.permute_dimensions(x,(2,0,1)))
    gram = K.dot(features,K.transpose(features))
    return gram 
Example 34
Project: deepflying   Author: dslab-deepflying   File: neural_style_transfer.py    GNU General Public License v3.0 5 votes vote down vote up
def gram_matrix(x):
    assert K.ndim(x) == 3
    if K.image_data_format() == 'channels_first':
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram

# the "style loss" is designed to maintain
# the style of the reference image in the generated image.
# It is based on the gram matrices (which capture style) of
# feature maps from the style reference image
# and from the generated image 
Example 35
Project: deepflying   Author: dslab-deepflying   File: nst.py    GNU General Public License v3.0 5 votes vote down vote up
def gram_matrix(x):
    assert K.ndim(x) == 3
    if K.image_data_format() == 'channels_first':
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram 
Example 36
Project: deepflying   Author: dslab-deepflying   File: neural_style_transfer.py    GNU General Public License v3.0 5 votes vote down vote up
def gram_matrix(x):
    assert K.ndim(x) == 3
    if K.image_data_format() == 'channels_first':
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram

# the "style loss" is designed to maintain
# the style of the reference image in the generated image.
# It is based on the gram matrices (which capture style) of
# feature maps from the style reference image
# and from the generated image 
Example 37
Project: dense_tensor   Author: bstriner   File: dense_tensor.py    MIT License 5 votes vote down vote up
def call(self, x, mask=None):
        output = K.dot(x, self.W)
        q = quadratic_batch(x, self.V)
        output += q
        if self.bias:
            output += self.b
        return self.activation(output) 
Example 38
Project: dense_tensor   Author: bstriner   File: theano_backend.py    MIT License 5 votes vote down vote up
def quadratic_batch(x, V):
    tmp1 = K.dot(x, V)  # n,input_dim + units,input_dim,input_dim = n,units,input_dim
    tmp2 = K.batch_dot(x, tmp1, axes=[[1], [2]])  # n,input_dim + n,units,input_dim = n,units
    return tmp2 
Example 39
Project: dense_tensor   Author: bstriner   File: tensorflow_backend.py    MIT License 5 votes vote down vote up
def quadratic_batch(x, V):
    tmp1 = K.dot(x, V)  # n,input_dim + units,input_dim,input_dim = n,units,input_dim
    xr = K.expand_dims(x, 2)  # n, 1, input_dim
    tmp2 = K.permute_dimensions(tmp1, (0, 2, 1))  # n, input_dim, units
    tmp3 = K.batch_dot(xr, tmp2, axes=[[1], [1]])  # n,1,input_dim + n,input_dim,units = n,1,units
    tmp4 = tmp3[:, 0, :]
    return tmp4 
Example 40
Project: trVAE   Author: theislab   File: _utils.py    MIT License 5 votes vote down vote up
def compute_kernel(x, y, kernel='rbf', **kwargs):
    """
        Computes RBF kernel between x and y.
        # Parameters
            x: Tensor
                Tensor with shape [batch_size, z_dim]
            y: Tensor
                Tensor with shape [batch_size, z_dim]
        # Returns
            returns the computed RBF kernel between x and y
    """
    scales = kwargs.get("scales", [])
    if kernel == "rbf":
        x_size = K.shape(x)[0]
        y_size = K.shape(y)[0]
        dim = K.shape(x)[1]
        tiled_x = K.tile(K.reshape(x, K.stack([x_size, 1, dim])), K.stack([1, y_size, 1]))
        tiled_y = K.tile(K.reshape(y, K.stack([1, y_size, dim])), K.stack([x_size, 1, 1]))
        return K.exp(-K.mean(K.square(tiled_x - tiled_y), axis=2) / K.cast(dim, tf.float32))
    elif kernel == 'raphy':
        scales = K.variable(value=np.asarray(scales))
        squared_dist = K.expand_dims(squared_distance(x, y), 0)
        scales = K.expand_dims(K.expand_dims(scales, -1), -1)
        weights = K.eval(K.shape(scales)[0])
        weights = K.variable(value=np.asarray(weights))
        weights = K.expand_dims(K.expand_dims(weights, -1), -1)
        return K.sum(weights * K.exp(-squared_dist / (K.pow(scales, 2))), 0)
    elif kernel == "multi-scale-rbf":
        sigmas = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 5, 10, 15, 20, 25, 30, 35, 100, 1e3, 1e4, 1e5, 1e6]

        beta = 1. / (2. * (K.expand_dims(sigmas, 1)))
        distances = squared_distance(x, y)
        s = K.dot(beta, K.reshape(distances, (1, -1)))

        return K.reshape(tf.reduce_sum(tf.exp(-s), 0), K.shape(distances)) / len(sigmas) 
Example 41
Project: ICASSP2019_TCN   Author: DSIP-UPatras   File: custom_layers.py    MIT License 5 votes vote down vote up
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel) 
Example 42
Project: lmtc-eurlex57k   Author: iliaschalkidis   File: attention.py    Apache License 2.0 5 votes vote down vote up
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel) 
Example 43
Project: lmtc-eurlex57k   Author: iliaschalkidis   File: attention.py    Apache License 2.0 5 votes vote down vote up
def call(self, x, mask=None):
        # Unfold inputs (document representations, label representations)
        doc_reps, label_reps = x

        doc2_reps = K.tanh(dot_product(doc_reps, self.W_d) + self.b_d)

        # Compute Attention Scores
        doc_a = dot_product(doc2_reps, label_reps)

        def label_wise_attention(values):
            doc_repi, ai = values
            ai = K.softmax(K.transpose(ai))
            label_aware_doc_rep = K.dot(ai, doc_repi)
            if self.return_attention:
                return [label_aware_doc_rep, ai]
            else:
                return [label_aware_doc_rep, label_aware_doc_rep]

        label_aware_doc_reprs, attention_scores = K.tf.map_fn(label_wise_attention, [doc_reps, doc_a])

        label_aware_doc_reprs = K.sum(label_aware_doc_reprs * label_reps, axis=-1)
        label_aware_doc_reprs = K.sigmoid(label_aware_doc_reprs)

        if self.return_attention:
            return [label_aware_doc_reprs, attention_scores]

        return label_aware_doc_reprs 
Example 44
Project: Document-Classifier-LSTM   Author: AlexGidiotis   File: attention.py    MIT License 5 votes vote down vote up
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel) 
Example 45
Project: AI2-Reasoning-Challenge-ARC   Author: SebiSebi   File: keras_custom_layers.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, inputs):
        if not isinstance(inputs, list):
            raise ValueError('Linked Attention layer expects a list '
                             'of tensors as inputs.')
        if len(inputs) != 2:
            raise ValueError('Linked Attention layer expects two tensors as '
                             'input, {} were given'.format(len(inputs)))
        input_states = inputs[0]
        last_state = inputs[1]

        # Each LSTM state is a row vector in "input_states".
        # Apply a linear transformation to each hidden state.
        # The same transformation to all states.
        # hs.shape = (batch_size, timestamps, self.dim)
        hs = K.dot(input_states, self.Wy)

        # Apply a linear function to last_state and expand
        # it to each row vector.
        # aux3.shape = (batch_size, timestamps, size_LSTM_2)
        # aux4.shape = (batch_size, timestamps, self.dim)
        aux1 = K.expand_dims(last_state, -1)
        aux2 = K.dot(aux1, K.ones(shape=(1, self.num_timestamps)))
        aux3 = K.permute_dimensions(aux2, (0, 2, 1))
        aux4 = K.dot(aux3, self.Wh)
        assert(aux3.shape[1] == hs.shape[1])
        assert(aux3.shape[2] == last_state.shape[1])
        assert(aux4.shape[1] == hs.shape[1])
        assert(aux4.shape[2] == hs.shape[2])
        assert(aux4.shape[2] == self.dim)

        m = K.relu(hs + aux4)
        alpha = K.expand_dims(K.softmax(K.squeeze(K.dot(m, self.w), -1)), 1)

        # r.shape = (batch_size, 1, size_LSTM_1)
        r = K.batch_dot(alpha, input_states)

        output_1 = K.dot(r, self.Wp)
        output_2 = K.dot(K.expand_dims(last_state, 1), self.Wx)
        output_3 = K.squeeze(output_1, 1) + K.squeeze(output_2, 1)
        return K.relu(output_3) 
Example 46
Project: AI2-Reasoning-Challenge-ARC   Author: SebiSebi   File: keras_custom_layers.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, inputs):
        if not isinstance(inputs, list):
            raise ValueError('Similarity layer expects a list '
                             'of tensors as inputs.')
        if len(inputs) != 2:
            raise ValueError('Similarity layer expects two tensors as '
                             'input, {} were given.'.format(len(inputs)))

        x = inputs[0]
        y = inputs[1]

        # Each line in X should have the form: dataX,       1s,      dataX.
        # Each line in Y should have the form: 1s,       dataY,      dataY.
        #
        #                               =>     dataX,    dataY,  dataX * dataY
        #
        x = K.concatenate([x, K.ones(K.shape(x)), x], axis=-1)
        y = K.concatenate([K.ones(K.shape(y)), y, y], axis=-1)

        # Pair each lines and take elementwise product (without summation).
        # x = K.reshape(x, (-1, x.shape[1], 1, x.shape[2]))
        # y = K.reshape(y, (-1, 1, y.shape[1], y.shape[2]))
        x = K.expand_dims(x, axis=2)
        y = K.expand_dims(y, axis=1)
        rez = x * y

        # Apply dot product with a vector.
        rez = rez * self.WS

        # return K.ones((1, 93)) * self.WS
        return K.sum(rez, axis=-1) 
Example 47
Project: AI2-Reasoning-Challenge-ARC   Author: SebiSebi   File: keras_custom_layers.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, inputs):
        if not isinstance(inputs, list):
            raise ValueError('Linked Attention layer expects a list '
                             'of tensors as inputs.')
        if len(inputs) != 2:
            raise ValueError('Linked Attention layer expects two tensors as '
                             'input, {} were given'.format(len(inputs)))
        input_states = inputs[0]
        last_state = inputs[1]

        # Each LSTM state is a row vector in "input_states".
        # Apply a linear transformation to each hidden state.
        # The same transformation to all states.
        # hs.shape = (batch_size, timestamps, self.dim)
        hs = K.dot(input_states, self.Wy)

        # Apply a linear function to last_state and expand
        # it to each row vector.
        # aux3.shape = (batch_size, timestamps, size_LSTM_2)
        # aux4.shape = (batch_size, timestamps, self.dim)
        aux1 = K.expand_dims(last_state, -1)
        aux2 = K.dot(aux1, K.ones(shape=(1, self.num_timestamps)))
        aux3 = K.permute_dimensions(aux2, (0, 2, 1))
        aux4 = K.dot(aux3, self.Wh)
        assert(aux3.shape[1] == hs.shape[1])
        assert(aux3.shape[2] == last_state.shape[1])
        assert(aux4.shape[1] == hs.shape[1])
        assert(aux4.shape[2] == hs.shape[2])
        assert(aux4.shape[2] == self.dim)

        m = K.relu(hs + aux4)
        alpha = K.expand_dims(K.softmax(K.squeeze(K.dot(m, self.w), -1)), 1)

        # r.shape = (batch_size, 1, size_LSTM_1)
        r = K.batch_dot(alpha, input_states)

        output_1 = K.dot(r, self.Wp)
        output_2 = K.dot(K.expand_dims(last_state, 1), self.Wx)
        output_3 = K.squeeze(output_1, 1) + K.squeeze(output_2, 1)
        return K.relu(output_3) 
Example 48
Project: AI2-Reasoning-Challenge-ARC   Author: SebiSebi   File: keras_custom_layers.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, inputs):
        if not isinstance(inputs, list):
            raise ValueError('Similarity layer expects a list '
                             'of tensors as inputs.')
        if len(inputs) != 2:
            raise ValueError('Similarity layer expects two tensors as '
                             'input, {} were given.'.format(len(inputs)))

        x = inputs[0]
        y = inputs[1]

        # Each line in X should have the form: dataX,       1s,      dataX.
        # Each line in Y should have the form: 1s,       dataY,      dataY.
        #
        #                               =>     dataX,    dataY,  dataX * dataY
        #
        x = K.concatenate([x, K.ones(K.shape(x)), x], axis=-1)
        y = K.concatenate([K.ones(K.shape(y)), y, y], axis=-1)

        # Pair each lines and take elementwise product (without summation).
        # x = K.reshape(x, (-1, x.shape[1], 1, x.shape[2]))
        # y = K.reshape(y, (-1, 1, y.shape[1], y.shape[2]))
        x = K.expand_dims(x, axis=2)
        y = K.expand_dims(y, axis=1)
        rez = x * y

        # Apply dot product with a vector.
        rez = rez * self.WS

        # return K.ones((1, 93)) * self.WS
        return K.sum(rez, axis=-1) 
Example 49
Project: DeepLearn   Author: GauravBh1010tt   File: layers.py    MIT License 5 votes vote down vote up
def call(self, data, mask=None):
        v1 = data[0]
        v2 = data[1]
        sim = K.dot(v1,self.W)
        sim = K.batch_dot(sim,v2,axes=1)
        return sim 
Example 50
Project: dockerizeme   Author: dockerizeme   File: snippet.py    Apache License 2.0 5 votes vote down vote up
def step(self, inputs, states):
        h = states[0]
        d = states[1]
        n = states[2]
        a_max = states[3]
#         dp_mask = states[2]
#         rec_dp_mask = states[3]
        inputs_joined = K.concatenate([inputs, h], axis=-1)
        u = K.dot(inputs,self.features_kernel)
        u = K.bias_add(u, self.features_bias)
        
        g = K.dot(inputs_joined, self.recurrent_kernel)
        g = K.bias_add(g, self.recurrent_bias)
        
        a = K.dot(inputs_joined, self.average_kernel)
        
        z = u * self.recurrent_activation(g)
        
        a_newmax = K.maximum(a_max, a)
        exp_diff = K.exp(a_max - a_newmax)
        exp_scaled = K.exp(a - a_newmax)
        
        n = n*exp_diff + z*exp_scaled
        d = d*exp_diff + exp_scaled
        h_new = self.activation(n/d)
        a_max = a_newmax
        h = h_new

        return h, [h, d, n, a_max] 
Example 51
Project: dockerizeme   Author: dockerizeme   File: snippet.py    Apache License 2.0 5 votes vote down vote up
def step(self, x, states):
        # This is based on [tensorflows implementation](https://github.com/tensorflow/tensorflow/blob/c8a45a8e236776bed1d14fd71f3b6755bd63cc58/tensorflow/python/ops/seq2seq.py#L506).
        # First, we calculate new attention masks:
        #   attn = softmax(V^T * tanh(W2 * X +b2 + W1 * h))
        # and we make the input as a concatenation of the input and weighted inputs which is then
        # transformed back to the shape x of using W3
        #   x = W3*(x+X*attn)+b3
        # Then, we run the cell on a combination of the input and previous attention masks:
        #   h, state = cell(x, h).
        
        nb_samples, nb_time, input_dim = self.input_spec[0].shape
        h = states[0]
        X = states[-1]
        xW1 = states[-2]
        
        Xr = K.reshape(X,(-1,nb_time,1,input_dim))
        hW2 = K.dot(h,self.W2)+self.b2
        hW2 = K.reshape(hW2,(-1,1,1,input_dim)) 
        u = K.tanh(xW1+hW2)
        a = K.sum(self.V*u,[2,3])
        a = K.softmax(a)
        a = K.reshape(a,(-1, nb_time, 1, 1))
        
        # Weight attention vector by attention
        Xa = K.sum(a*Xr,[1,2])
        Xa = K.reshape(Xa,(-1,input_dim))
        
        # Merge input and attention weighted inputs into one vector of the right size.
        x = K.dot(K.concatenate([x,Xa],1),self.W3)+self.b3    
        
        h, new_states = self.layer.step(x, states)
        return h, new_states 
Example 52
Project: dockerizeme   Author: dockerizeme   File: snippet.py    Apache License 2.0 5 votes vote down vote up
def get_constants(self, x):
        constants = self.layer.get_constants(x)
        
        # Calculate K.dot(x, W2) only once per sequence by making it a constant
        nb_samples, nb_time, input_dim = self.input_spec[0].shape
        Xr = K.reshape(x,(-1,nb_time,input_dim,1))
        Xrt = K.permute_dimensions(Xr, (0, 2, 1, 3))
        xW1t = K.conv2d(Xrt,self.W1,border_mode='same')     
        xW1 = K.permute_dimensions(xW1t, (0, 2, 3, 1))
        constants.append(xW1)
        
        # we need to supply the full sequence of inputs to step (as the attention_vector)
        constants.append(x)
        
        return constants 
Example 53
Project: spektral   Author: danielegrattarola   File: base.py    MIT License 5 votes vote down vote up
def call(self, inputs):
        if self.trainable_kernel:
            output = K.dot(K.dot(inputs, self.kernel), K.transpose(inputs))
        else:
            output = K.dot(inputs, K.transpose(inputs))
        if self.activation is not None:
            output = self.activation(output)
        return output 
Example 54
Project: spektral   Author: danielegrattarola   File: base.py    MIT License 5 votes vote down vote up
def call(self, inputs):
        F = K.int_shape(inputs)[-1]
        minkowski_prod_mat = np.eye(F)
        minkowski_prod_mat[-1, -1] = -1.
        minkowski_prod_mat = K.constant(minkowski_prod_mat)
        output = K.dot(inputs, minkowski_prod_mat)
        output = K.dot(output, K.transpose(inputs))
        output = K.clip(output, -10e9, -1.)

        if self.activation is not None:
            output = self.activation(output)

        return output 
Example 55
Project: spektral   Author: danielegrattarola   File: convolutional.py    MIT License 5 votes vote down vote up
def call(self, inputs):
        features = inputs[0]
        fltr = inputs[1]

        # Convolution
        output = K.dot(features, self.kernel)
        output = filter_dot(fltr, output)

        if self.use_bias:
            output = K.bias_add(output, self.bias)
        if self.activation is not None:
            output = self.activation(output)
        return output 
Example 56
Project: spektral   Author: danielegrattarola   File: convolutional.py    MIT License 5 votes vote down vote up
def dense_layer(self,
                    x,
                    units,
                    name,
                    activation=None,
                    use_bias=True,
                    kernel_initializer='glorot_uniform',
                    bias_initializer='zeros',
                    kernel_regularizer=None,
                    bias_regularizer=None,
                    kernel_constraint=None,
                    bias_constraint=None):
        input_dim = K.int_shape(x)[-1]
        kernel = self.add_weight(shape=(input_dim, units),
                                 name=name + '_kernel',
                                 initializer=kernel_initializer,
                                 regularizer=kernel_regularizer,
                                 constraint=kernel_constraint)
        bias = self.add_weight(shape=(units,),
                               name=name + '_bias',
                               initializer=bias_initializer,
                               regularizer=bias_regularizer,
                               constraint=bias_constraint)
        act = activations.get(activation)
        output = K.dot(x, kernel)
        if use_bias:
            output = K.bias_add(output, bias)
        output = act(output)
        return output 
Example 57
Project: spektral   Author: danielegrattarola   File: convolutional.py    MIT License 5 votes vote down vote up
def call(self, inputs):
        features = inputs[0]
        fltr = inputs[1]

        # Compute MLP hidden features
        for i in range(len(self.kernels_mlp)):
            features = Dropout(self.dropout_rate)(features)
            features = K.dot(features, self.kernels_mlp[i])
            if self.use_bias:
                features += self.biases_mlp[i]
            if self.mlp_activation is not None:
                features = self.mlp_activation(features)

        # Compute MLP output
        mlp_out = K.dot(features, self.kernel_out)
        if self.use_bias:
            mlp_out += self.bias_out

        # Propagation
        Z = mlp_out
        for k in range(self.propagations):
            Z = (1 - self.alpha) * filter_dot(fltr, Z) + self.alpha * mlp_out

        if self.activation is not None:
            output = self.activation(Z)
        else:
            output = Z
        return output 
Example 58
Project: spektral   Author: danielegrattarola   File: convolutional.py    MIT License 5 votes vote down vote up
def call(self, inputs):
        features = inputs[0]
        fltr = inputs[1]

        if not K.is_sparse(fltr):
            fltr = tf.contrib.layers.dense_to_sparse(fltr)

        # Input layer
        features_neigh = tf.segment_sum(tf.gather(features, fltr.indices[:, -1]), fltr.indices[:, -2])
        hidden = (1.0 + self.eps) * features + features_neigh
        hidden = K.dot(hidden, self.kernel_in)
        if self.use_bias:
            hidden = K.bias_add(hidden, self.bias_in)
        if self.hidden_activation is not None:
            hidden = self.hidden_activation(hidden)

        # More hidden layers (optional)
        for k in range(self.extra_hidden_layers):
            hidden = K.dot(hidden, self.kernels_hid[k])
            if self.use_bias:
                hidden = K.bias_add(hidden, self.biases_hid[k])
            if self.hidden_activation is not None:
                hidden = self.hidden_activation(hidden)

        # Output layer
        output = K.dot(hidden, self.kernel_out)
        if self.use_bias:
            output = K.bias_add(output, self.bias_out)
        if self.activation is not None:
            output = self.activation(output)

        return output 
Example 59
Project: spektral   Author: danielegrattarola   File: ops.py    MIT License 5 votes vote down vote up
def matmul_A_B(A, B):
    """
    Computes A * B, dealing with sparsity and single/batch/mixed modes
    automatically. Mixed mode multiplication also works when A has rank 3 and
    B has rank 2. Sparse multiplication does not work with batch mode.
    :param A: Tensor or SparseTensor with rank 2 or 3.
    :param B: Tensor or SparseTensor with rank 2 or 3.
    :return:
    """
    mode = autodetect_mode(A, B)
    if mode == modes['S']:
        # Single mode
        output = single_mode_dot(A, B)
    elif mode == modes['M']:
        # Mixed mode
        output = mixed_mode_dot(A, B)
    elif mode == modes['iM']:
        # Inverted mixed (rank(A)=3, rank(B)=2)
        # Works only with dense tensors
        output = K.dot(A, B)
    elif mode == modes['B']:
        # Batch mode
        # Works only with dense tensors
        output = K.batch_dot(A, B)
    else:
        raise ValueError('A and B must have rank 2 or 3.')

    return output 
Example 60
Project: spektral   Author: danielegrattarola   File: ops.py    MIT License 5 votes vote down vote up
def matmul_A_BT(A, B):
    """
    Computes A * B.T, dealing with sparsity and single/batch/mixed modes
    automatically. Mixed mode multiplication also works when A has rank 3 and
    B has rank 2. Sparse multiplication does not work with batch mode.
    :param A: Tensor or SparseTensor with rank 2 or 3.
    :param B: Tensor or SparseTensor with rank 2 or 3.
    :return:
    """
    mode = autodetect_mode(A, B)
    if mode == modes['S']:
        # Single (rank(A)=2, rank(B)=2)
        output = single_mode_dot(A, transpose(B))
    elif mode == modes['M']:
        # Mixed (rank(A)=2, rank(B)=3)
        output = mixed_mode_dot(A, transpose(B, (0, 2, 1)))
    elif mode == modes['iM']:
        # Inverted mixed (rank(A)=3, rank(B)=2)
        # Works only with dense tensors
        output = K.dot(A, transpose(B))
    elif mode == modes['B']:
        # Batch (rank(A)=3, rank(B)=3)
        # Works only with dense tensors
        output = K.batch_dot(A, transpose(B, (0, 2, 1)))
    else:
        raise ValueError('A and B must have rank 2 or 3.')

    return output


################################################################################
# Ops related to the modes of operation (single, mixed, batch)
################################################################################ 
Example 61
Project: keras_extension   Author: k1414st   File: graph.py    MIT License 5 votes vote down vote up
def call(self, inputs, encode=True, training=None):
        """
        Args:
            input[0]: input_layer(N_Batch, L_sequence, Dim_fature)
            input[1]: weighted-digraph(L, L) = (from, to)
        Return:
            output_layer(N_Batch, L_sequence, Dim_feature)
        """
        if training is not None:
            raise NotImplementedError('training option is not implemented yet.')

        input_data = inputs[0]
        graph = inputs[1]

        if len(inputs) == 3:
            state = inputs[2]
        else:
            state = self.get_initial_state(inputs[0])

        if encode:
            beta = K.dot(input_data, self.e_weight)
        else:
            beta = input_data

        # BL(o)L(i),BL(i)D,->BL(o)D
        agg_beta = K.batch_dot(graph, beta, axes=(2, 1))
        # output = (h, [h, c])
        outputs, states = self.cell.call(beta, [agg_beta, state])

        if self.return_state:
            return [outputs, states[1]]
        else:
            return outputs 
Example 62
Project: keras_extension   Author: k1414st   File: layer.py    MIT License 5 votes vote down vote up
def call(self, inputs, training=None):
        """
        Args:
            input[0]: input_layer(N_Batch, L_sequence, Dim_fature)
            input[1]: weighted-digraph(L, L) = (from, to)
        Return:
            output_layer(N_Batch, L_sequence, Dim_feature)
        """
        seq_data = inputs[0]
        graph = inputs[1]

        # beta (edge)
        beta = K.dot(seq_data, self.e_weight)
        beta = _batch_dot(graph, beta, axes=(2, 1))  # BL(o)L(i)M,BL(i)D,->BL(o)MD

        # connect edge, (node), bias
        out = beta
        if self.use_bias:
            out = K.bias_add(out, self.bias)
        if self.use_node_weight:
            s = self.v_weight.shape
            w = K.reshape(self.v_weight, (s[0], s[1]*s[2]))
            alpha = K.dot(seq_data, w)
            alpha = K.reshape(alpha, (-1, alpha.shape[1], s[1], s[2]))
            out = out + alpha
        gi = self.activation(out)
        return gi 
Example 63
Project: keras_extension   Author: k1414st   File: layer.py    MIT License 5 votes vote down vote up
def call(self, inputs, encode=True, training=None):
        """
        Args:
            input[0]: input_layer(N_Batch, L_sequence, Dim_fature)
            input[1]: weighted-digraph(L, L) = (from, to)
        Return:
            output_layer(N_Batch, L_sequence, Dim_feature)
        """
        if training is not None:
            raise NotImplementedError('training option is not implemented yet.')

        input_data = inputs[0]
        graph = inputs[1]

        if len(inputs) == 3:
            state = inputs[2]
        else:
            state = self.get_initial_state(inputs[0])

        if encode:
            beta = K.dot(input_data, self.e_weight)
        else:
            beta = input_data

        # BL(o)L(i),BL(i)D,->BL(o)D
        agg_beta = K.batch_dot(graph, beta, axes=(2, 1))
        # output = (h, [h, c])
        outputs, states = self.cell.call(beta, [agg_beta, state])

        if self.return_state:
            return [outputs, states[1]]
        else:
            return outputs 
Example 64
Project: keras_extension   Author: k1414st   File: mac.py    MIT License 5 votes vote down vote up
def call(self, inputs, training=None):

        c_prev, extractor, cw_s = inputs[0], inputs[1], inputs[2]

        q_i = K.bias_add(K.dot(extractor, self.q_kernel),
                         self.q_bias)
        cq_i = K.bias_add(K.dot(K.concatenate([c_prev, q_i], axis=-1),
                                self.cq_kernel),
                          self.cq_bias)
        cqcw = K.expand_dims(cq_i, axis=1) * cw_s
        ca_is = K.bias_add(K.dot(cqcw, self.ca_kernel),
                           self.ca_bias)
        cv_is = self.attention_activation(ca_is, axis=-1)
        c_i = K.sum(cv_is * cw_s, axis=1)
        return c_i 
Example 65
Project: keras_extension   Author: k1414st   File: core_sparse_tf.py    MIT License 5 votes vote down vote up
def call(self, inputs):
        if self._is_sparse:
            output = tf.sparse.matmul(inputs, self.kernel)
        else:
            output = K.dot(inputs, self.kernel)
        if self.use_bias:
            output = K.bias_add(output, self.bias, data_format='channels_last')
        if self.activation is not None:
            output = self.activation(output)
        return output 
Example 66
Project: keras_extension   Author: k1414st   File: graph.py    MIT License 5 votes vote down vote up
def call(self, inputs, encode=True, training=None):
        """
        Args:
            input[0]: input_layer(N_Batch, L_sequence, Dim_fature)
            input[1]: weighted-digraph(L, L) = (from, to)
        Return:
            output_layer(N_Batch, L_sequence, Dim_feature)
        """
        if training is not None:
            raise NotImplementedError('training option is not implemented yet.')

        input_data = inputs[0]
        graph = inputs[1]

        if len(inputs) == 3:
            state = inputs[2]
        else:
            state = self.get_initial_state(inputs[0])

        if encode:
            beta = K.dot(input_data, self.e_weight)
        else:
            beta = input_data

        # BL(o)L(i),BL(i)D,->BL(o)D
        agg_beta = K.batch_dot(graph, beta, axes=(2, 1))
        # output = (h, [h, c])
        outputs, states = self.cell.call(beta, [agg_beta, state])

        if self.return_state:
            return [outputs, states[1]]
        else:
            return outputs 
Example 67
Project: smach_based_introspection_framework   Author: birlrobotics   File: layer_utils.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def _time_distributed_dense(x, w, b=None, dropout=None,
                            input_dim=None, output_dim=None,
                            timesteps=None, training=None):
    """Apply `y . w + b` for every temporal slice y of x.

    # Arguments
        x: input tensor.
        w: weight matrix.
        b: optional bias vector.
        dropout: wether to apply dropout (same dropout mask
            for every temporal slice of the input).
        input_dim: integer; optional dimensionality of the input.
        output_dim: integer; optional dimensionality of the output.
        timesteps: integer; optional number of timesteps.
        training: training phase tensor or boolean.

    # Returns
        Output tensor.
    """
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.int_shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    # reshape to 3D tensor
    if K.backend() == 'tensorflow':
        x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
        x.set_shape([None, None, output_dim])
    else:
        x = K.reshape(x, (-1, timesteps, output_dim))
    return x 
Example 68
Project: deep-models   Author: LaurentMazare   File: lstm_zoneout.py    Apache License 2.0 4 votes vote down vote up
def step(self, x, states):
    h_tm1 = states[0]
    c_tm1 = states[1]
    B_U = states[2]
    B_W = states[3]

    if self.consume_less == 'gpu':
      z = K.dot(x * B_W[0], self.W) + K.dot(h_tm1 * B_U[0], self.U) + self.b

      z0 = z[:, :self.output_dim]
      z1 = z[:, self.output_dim: 2 * self.output_dim]
      z2 = z[:, 2 * self.output_dim: 3 * self.output_dim]
      z3 = z[:, 3 * self.output_dim:]

      i = self.inner_activation(z0)
      f = self.inner_activation(z1)
      c = f * c_tm1 + i * self.activation(z2)
      o = self.inner_activation(z3)
    else:
      if self.consume_less == 'cpu':
        x_i = x[:, :self.output_dim]
        x_f = x[:, self.output_dim: 2 * self.output_dim]
        x_c = x[:, 2 * self.output_dim: 3 * self.output_dim]
        x_o = x[:, 3 * self.output_dim:]
      elif self.consume_less == 'mem':
        x_i = K.dot(x * B_W[0], self.W_i) + self.b_i
        x_f = K.dot(x * B_W[1], self.W_f) + self.b_f
        x_c = K.dot(x * B_W[2], self.W_c) + self.b_c
        x_o = K.dot(x * B_W[3], self.W_o) + self.b_o
      else:
        raise Exception('Unknown `consume_less` mode.')

      i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i))
      f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f))
      c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * B_U[2], self.U_c))
      o = self.inner_activation(x_o + K.dot(h_tm1 * B_U[3], self.U_o))

    if self.zoneout_c:
      c = self.zoneout(c, c_tm1, pr=self.zoneout_c)
    h = o * self.activation(c)
    if self.zoneout_h:
      h = self.zoneout(h, h_tm1, pr=self.zoneout_h)
    return h, [h, c] 
Example 69
Project: keras-minimal-rnn   Author: titu1994   File: minimal_rnn.py    MIT License 4 votes vote down vote up
def _time_distributed_dense(x, w, b=None, dropout=None,
                            input_dim=None, output_dim=None,
                            timesteps=None, training=None):
    """Apply `y . w + b` for every temporal slice y of x.

    # Arguments
        x: input tensor.
        w: weight matrix.
        b: optional bias vector.
        dropout: wether to apply dropout (same dropout mask
            for every temporal slice of the input).
        input_dim: integer; optional dimensionality of the input.
        output_dim: integer; optional dimensionality of the output.
        timesteps: integer; optional number of timesteps.
        training: training phase tensor or boolean.

    # Returns
        Output tensor.
    """
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.int_shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    # reshape to 3D tensor
    if K.backend() == 'tensorflow':
        x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
        x.set_shape([None, None, output_dim])
    else:
        x = K.reshape(x, (-1, timesteps, output_dim))
    return x 
Example 70
Project: keras-minimal-rnn   Author: titu1994   File: minimal_rnn.py    MIT License 4 votes vote down vote up
def call(self, inputs, states, training=None):
        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_x = inputs * dp_mask[0]
            else:
                inputs_x = inputs

            x_z = K.dot(inputs_x, self.kernel)
            if self.use_bias:
                x_z = K.bias_add(x_z, self.bias_z)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_u = h_tm1 * rec_dp_mask[0]
                h_tm1_z = h_tm1 * rec_dp_mask[1]
            else:
                h_tm1_u = h_tm1
                h_tm1_z = h_tm1

            z = self.activation(x_z)
            u = K.dot(h_tm1_u, self.recurrent_kernel_u) + K.dot(h_tm1_z, self.recurrent_kernel_z)
            if self.use_bias:
                u = K.bias_add(u, self.bias_u)
            u = self.recurrent_activation(u)

        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]
            z = K.dot(inputs, self.kernel)
            if self.use_bias:
                z = K.bias_add(z, self.bias_z)
            z = self.activation(z)

            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]
            u = K.dot(h_tm1, self.recurrent_kernel_u) + K.dot(h_tm1, self.recurrent_kernel_z)
            if self.use_bias:
                u = K.bias_add(u, self.bias_u)
            u = self.recurrent_activation(u)

        h = u * h_tm1 + (1 - u) * z
        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h,] 
Example 71
Project: KDDCup2019_admin   Author: DominickZhang   File: fm_keras.py    MIT License 4 votes vote down vote up
def call(self, inputs, **kwargs):
        one_hot_feature_index = K.cast(K.slice(inputs, (0, 0), (-1, self.feature_num)), "int32")
        numeric_feature = K.slice(inputs, (0, self.feature_num), (-1, -1))

        ## first order
        first_order_index = K.reshape(one_hot_feature_index, (-1,))
        get_first_order_weights = K.gather(self.w_one_hot, first_order_index)
        first_order_weights = K.reshape(get_first_order_weights, (-1, self.feature_num))

        first_order = K.sum(first_order_weights, 1) + K.sum(K.dot(numeric_feature, self.w_numeric), 1)

        ## second order
        get_second_order_weights = K.gather(self.v_one_hot, first_order_index)
        second_order_weights = K.reshape(get_second_order_weights, (-1, self.feature_num, self.embedding_size))
        numeric_weights = K.expand_dims(self.v_numeric, 0) * K.expand_dims(numeric_feature, -1)

        all_weights = K.concatenate([second_order_weights, numeric_weights], axis=1)
        weights_sum_square = K.sum(K.square(all_weights), 1)
        weights_square_sum = K.square(K.sum(all_weights, 1))
        second_order = 0.5*K.sum(weights_square_sum - weights_sum_square, 1)

        output = first_order + second_order + self.b

        if self.activation is not None:
        	output = self.activation(output)
        output = K.expand_dims(output, -1)
        return output



        '''X_square = K.square(inputs)

        xv = K.square(K.dot(inputs, self.v))
        xw = K.dot(inputs, self.w)

        p = 0.5 * K.sum(xv - K.dot(X_square, K.square(self.v)), 1)
        rp = K.repeat_elements(K.reshape(p, (-1, 1)), self.output_dim, axis=-1)

        f = xw + rp + self.b

        output = K.reshape(f, (-1, self.output_dim))
        
        if self.activation is not None:
            output = self.activation(output)

        return output''' 
Example 72
Project: spektral   Author: danielegrattarola   File: pooling.py    MIT License 4 votes vote down vote up
def call(self, inputs):
        if len(inputs) == 3:
            X, A, I = inputs
            self.data_mode = 'graph'
        else:
            X, A = inputs
            I = tf.zeros(tf.shape(X)[:1], dtype=tf.int32)
            self.data_mode = 'single'
        if K.ndim(I) == 2:
            I = I[:, 0]

        A_is_sparse = K.is_sparse(A)

        # Get mask
        y = K.dot(X, K.l2_normalize(self.kernel))
        N = K.shape(X)[-2]
        indices = ops.segment_top_k(y[:, 0], I, self.ratio, self.top_k_var)
        mask = tf.scatter_nd(tf.expand_dims(indices, 1), tf.ones_like(indices), (N,))

        # Multiply X and y to make layer differentiable
        features = X * self.gating_op(y)

        axis = 0 if len(K.int_shape(A)) == 2 else 1  # Cannot use negative axis in tf.boolean_mask
        # Reduce X
        X_pooled = tf.boolean_mask(features, mask, axis=axis)

        # Compute A^2
        if A_is_sparse:
            A_dense = tf.sparse.to_dense(A)
        else:
            A_dense = A
        A_squared = K.dot(A, A_dense)

        # Reduce A
        A_pooled = tf.boolean_mask(A_squared, mask, axis=axis)
        A_pooled = tf.boolean_mask(A_pooled, mask, axis=axis + 1)
        if A_is_sparse:
            A_pooled = tf.contrib.layers.dense_to_sparse(A_pooled)

        output = [X_pooled, A_pooled]

        # Reduce I
        if self.data_mode == 'graph':
            I_pooled = tf.boolean_mask(I[:, None], mask)[:, 0]
            output.append(I_pooled)

        if self.return_mask:
            output.append(mask)

        return output 
Example 73
Project: spektral   Author: danielegrattarola   File: pooling.py    MIT License 4 votes vote down vote up
def call(self, inputs):
        if len(inputs) == 3:
            X, A, I = inputs
            self.data_mode = 'graph'
        else:
            X, A = inputs
            I = tf.zeros(tf.shape(X)[:1], dtype=tf.int32)
            self.data_mode = 'single'
        if K.ndim(I) == 2:
            I = I[:, 0]

        A_is_sparse = K.is_sparse(A)

        # Get mask
        y = K.dot(X, self.kernel)
        y = filter_dot(A, y)
        N = K.shape(X)[-2]
        indices = ops.segment_top_k(y[:, 0], I, self.ratio, self.top_k_var)
        mask = tf.scatter_nd(tf.expand_dims(indices, 1), tf.ones_like(indices), (N,))

        # Multiply X and y to make layer differentiable
        features = X * self.gating_op(y)

        axis = 0 if len(K.int_shape(A)) == 2 else 1  # Cannot use negative axis in tf.boolean_mask
        # Reduce X
        X_pooled = tf.boolean_mask(features, mask, axis=axis)

        # Compute A^2
        if A_is_sparse:
            A_dense = tf.sparse.to_dense(A)
        else:
            A_dense = A
        A_squared = K.dot(A, A_dense)

        # Reduce A
        A_pooled = tf.boolean_mask(A_squared, mask, axis=axis)
        A_pooled = tf.boolean_mask(A_pooled, mask, axis=axis + 1)
        if A_is_sparse:
            A_pooled = tf.contrib.layers.dense_to_sparse(A_pooled)

        output = [X_pooled, A_pooled]

        # Reduce I
        if self.data_mode == 'graph':
            I_pooled = tf.boolean_mask(I[:, None], mask)[:, 0]
            output.append(I_pooled)

        if self.return_mask:
            output.append(mask)

        return output 
Example 74
Project: spektral   Author: danielegrattarola   File: convolutional.py    MIT License 4 votes vote down vote up
def call(self, inputs):
        X = inputs[0]
        A = inputs[1]

        outputs = []
        output_attn = []
        for head in range(self.attn_heads):
            kernel = self.kernels[head]
            attention_kernel = self.attn_kernels[head]  # Attention kernel a in the paper (2F' x 1)

            # Compute inputs to attention network
            features = K.dot(X, kernel)

            # Compue attention coefficients
            # [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
            attn_for_self = K.dot(features, attention_kernel[0])    # [a_1]^T [Wh_i]
            attn_for_neighs = K.dot(features, attention_kernel[1])  # [a_2]^T [Wh_j]
            if len(K.int_shape(features)) == 2:
                # Single / mixed mode
                attn_for_neighs_T = K.transpose(attn_for_neighs)
            else:
                # Batch mode
                attn_for_neighs_T = K.permute_dimensions(attn_for_neighs, (0, 2, 1))
            attn_coef = attn_for_self + attn_for_neighs_T
            attn_coef = LeakyReLU(alpha=0.2)(attn_coef)

            # Mask values before activation (Vaswani et al., 2017)
            mask = -10e9 * (1.0 - A)
            attn_coef += mask

            # Apply softmax to get attention coefficients
            attn_coef = K.softmax(attn_coef)
            output_attn.append(attn_coef)

            # Apply dropout to attention coefficients
            attn_coef_drop = Dropout(self.dropout_rate)(attn_coef)

            # Convolution
            features = filter_dot(attn_coef_drop, features)
            if self.use_bias:
                features = K.bias_add(features, self.biases[head])

            # Add output of attention head to final output
            outputs.append(features)

        # Aggregate the heads' output according to the reduction method
        if self.concat_heads:
            output = K.concatenate(outputs)
        else:
            output = K.mean(K.stack(outputs), axis=0)

        output = self.activation(output)

        if self.return_attn_coef:
            return output, output_attn
        else:
            return output 
Example 75
Project: spektral   Author: danielegrattarola   File: ops.py    MIT License 4 votes vote down vote up
def matmul_AT_B_A(A, B):
    """
    Computes A.T * B * A, dealing with sparsity and single/batch/mixed modes
    automatically. Mixed mode multiplication also works when A has rank 3 and
    B has rank 2. Sparse multiplication does not work with batch mode.
    :param A: Tensor or SparseTensor with rank 2 or 3.
    :param B: Tensor or SparseTensor with rank 2 or 3.
    :return:
    """
    mode = autodetect_mode(A, B)
    if mode == modes['S']:
        # Single (rank(A)=2, rank(B)=2)
        output = single_mode_dot(single_mode_dot(transpose(A), B), A)
    elif mode == modes['M']:
        # Mixed (rank(A)=2, rank(B)=3)
        output = mixed_mode_dot(transpose(A), B)
        if K.is_sparse(A):
            output = transpose(
                mixed_mode_dot(transpose(A), transpose(output, (0, 2, 1))),
                (0, 2, 1)
            )
        else:
            output = K.dot(output, A)
    elif mode == modes['iM']:
        # Inverted mixed (rank(A)=3, rank(B)=2)
        # Works only with dense tensors
        output = mixed_mode_dot(B, A)
        output = K.batch_dot(transpose(A, (0, 2, 1)), output)
    elif mode == modes['B']:
        # Batch (rank(A)=3, rank(B)=3)
        # Works only with dense tensors
        output = K.batch_dot(
            K.batch_dot(
                transpose(A, (0, 2, 1)),
                B
            ),
            A
        )
    else:
        raise ValueError('A and B must have rank 2 or 3.')

    return output 
Example 76
Project: keras_extension   Author: k1414st   File: graph.py    MIT License 4 votes vote down vote up
def __graph_attention(self, g, x, w, a, n_heads=1):
        """
        using graph attention mechanism.

        Args:
            g: input Tensor of graph adjacency matrix.
               shape: (B(Batch_size), N(N_nodes), N)
            x: input Tensor of node-data after convolutioned.
               shape: (B, N, F_in(F_inputs))
            w: weight matrix variable
               (to transform input to attentionable hidden states.)
               shape: (F_in, F(F_outputs) * H(N_heads))
            a: merge weight vector from attentionable state to attention value.
               shape: (2 * F,)
        """
        H = n_heads
        F_in, FH = w.shape[0], w.shape[1]
        F = FH // H
        N = g.shape[-1]

        # w = K.reshape(F1, H * F2)  # (F_in, H*F)
        x = K.expand_dims(K.dot(x, w), axis=2)  # (B, N, 1, H*F)
        x = K.concatenate([x[:, :, :, F*i:F*(i+1)]
                           for i in range(H)], axis=2)  # (B, N, H, F)

        # concat meshly
        _x1 = K.tile(K.expand_dims(x, axis=1), (1, N, 1, 1, 1))
        _x2 = K.tile(K.expand_dims(x, axis=2), (1, 1, N, 1, 1))
        x = K.concatenate([_x1, _x2], axis=4)  # (B, N, N, H, 2F)

        def _expand_dims_recursive(x, axis_list):
            assert(len(axis_list) > 0)
            if len(axis_list) == 1:
                return K.expand_dims(x, axis_list[0])
            return _expand_dims_recursive(K.expand_dims(x, axis_list[0]),
                                          axis_list=axis_list[1:])
        # squeeze 2F
        a = _expand_dims_recursive(a, (0, 0, 0, 0))
        x = K.exp(K.relu(K.sum(x * a, axis=-1), alpha=0.2))  # (B, N, N, H)

        # normalize by neighbors
        x_norm = K.sum(x * K.expand_dims(g, axis=-1),
                       axis=2, keepdims=True)  # (B, N, 1, H)
        return x / x_norm  # (B, N, N, H) 
Example 77
Project: keras_extension   Author: k1414st   File: graph.py    MIT License 4 votes vote down vote up
def call(self, inputs, training=None):
        """
        Args:
            input[0]: input_layer(N_Batch, L_sequence, Dim_fature)
            input[1]: weighted-digraph(L, L) = (from, to)
        Return:
            output_layer(N_Batch, L_sequence, Dim_feature)
        """
        seq_data = inputs[0]
        graph = inputs[1]

        # graph gate
        if self.use_gate:
            if self.gate_mode == 'single_weight':
                graph = graph * self.__graph_gate(seq_data, self.gate_w1)
            else:
                graph = graph * \
                    self.__graph_gate(seq_data, self.gate_w1, self.gate_w2)

        # beta (edge)
        beta = K.dot(seq_data, self.e_weight)
        if self.use_gat:
            att_alpha = \
                self.__graph_attention(graph, seq_data, self.att_w_weight,
                                       self.att_a_weight, n_heads=self.gat_n_heads)
            list_att_beta = []
            for i in range(self.gat_n_heads):
                att_beta = K.batch_dot(att_alpha[:, :, :, i], beta, axes=(2, 1))
                list_att_beta.append(att_beta)
            beta = sum(list_att_beta)
        else:
            beta = K.batch_dot(graph, beta, axes=(2, 1))  # BL(o)L(i),BL(i)D,->BL(o)D

        # connect edge, (node), bias
        out = beta
        if self.use_bias:
            out = K.bias_add(out, self.bias)
        if self.use_node_weight:
            alpha = K.dot(seq_data, self.v_weight)
            out = out + alpha
        gi = self.activation(out)
        return gi 
Example 78
Project: keras_extension   Author: k1414st   File: layer.py    MIT License 4 votes vote down vote up
def _graph_attention(self, g, x, w, a, n_heads=1):
        """
        using graph attention mechanism.

        Args:
            g: input Tensor of graph adjacency matrix.
               shape: (B(Batch_size), N(N_nodes), N)
            x: input Tensor of node-data after convolutioned.
               shape: (B, N, F_in(F_inputs))
            w: weight matrix variable
               (to transform input to attentionable hidden states.)
               shape: (F_in, F(F_outputs) * H(N_heads))
            a: merge weight vector from attentionable state to attention value.
               shape: (2 * F,)
        """
        H = n_heads
        F_in, FH = w.shape[0], w.shape[1]
        F = FH // H
        N = g.shape[-1]

        # w = K.reshape(F1, H * F2)  # (F_in, H*F)
        x = K.expand_dims(K.dot(x, w), axis=2)  # (B, N, 1, H*F)
        x = K.concatenate([x[:, :, :, F*i:F*(i+1)]
                           for i in range(H)], axis=2)  # (B, N, H, F)

        # concat meshly
        _x1 = K.tile(K.expand_dims(x, axis=1), (1, N, 1, 1, 1))
        _x2 = K.tile(K.expand_dims(x, axis=2), (1, 1, N, 1, 1))
        x = K.concatenate([_x1, _x2], axis=4)  # (B, N, N, H, 2F)

        def _expand_dims_recursive(x, axis_list):
            assert(len(axis_list) > 0)
            if len(axis_list) == 1:
                return K.expand_dims(x, axis_list[0])
            return _expand_dims_recursive(K.expand_dims(x, axis_list[0]),
                                          axis_list=axis_list[1:])
        # squeeze 2F
        a = _expand_dims_recursive(a, (0, 0, 0, 0))
        x = K.exp(K.relu(K.sum(x * a, axis=-1), alpha=0.2))  # (B, N, N, H)

        # normalize by neighbors
        x_norm = K.sum(x * K.expand_dims(g, axis=-1),
                       axis=2, keepdims=True)  # (B, N, 1, H)
        return x / x_norm  # (B, N, N, H) 
Example 79
Project: keras_extension   Author: k1414st   File: graph.py    MIT License 4 votes vote down vote up
def __graph_attention(self, g, x, w, a, n_heads=1):
        """
        using graph attention mechanism.

        Args:
            g: input Tensor of graph adjacency matrix.
               shape: (B(Batch_size), N(N_nodes), N)
            x: input Tensor of node-data after convolutioned.
               shape: (B, N, F_in(F_inputs))
            w: weight matrix variable
               (to transform input to attentionable hidden states.)
               shape: (F_in, F(F_outputs) * H(N_heads))
            a: merge weight vector from attentionable state to attention value.
               shape: (2 * F,)
        """
        H = n_heads
        F_in, FH = w.shape[0], w.shape[1]
        F = FH // H
        N = g.shape[-1]

        # w = K.reshape(F1, H * F2)  # (F_in, H*F)
        x = K.expand_dims(K.dot(x, w), axis=2)  # (B, N, 1, H*F)
        x = K.concatenate([x[:, :, :, F*i:F*(i+1)]
                           for i in range(H)], axis=2)  # (B, N, H, F)

        # concat meshly
        _x1 = K.tile(K.expand_dims(x, axis=1), (1, N, 1, 1, 1))
        _x2 = K.tile(K.expand_dims(x, axis=2), (1, 1, N, 1, 1))
        x = K.concatenate([_x1, _x2], axis=4)  # (B, N, N, H, 2F)

        def _expand_dims_recursive(x, axis_list):
            assert(len(axis_list) > 0)
            if len(axis_list) == 1:
                return K.expand_dims(x, axis_list[0])
            return _expand_dims_recursive(K.expand_dims(x, axis_list[0]),
                                          axis_list=axis_list[1:])
        # squeeze 2F
        a = _expand_dims_recursive(a, (0, 0, 0, 0))
        x = K.exp(K.relu(K.sum(x * a, axis=-1), alpha=0.2))  # (B, N, N, H)

        # normalize by neighbors
        x_norm = K.sum(x * K.expand_dims(g, axis=-1),
                       axis=2, keepdims=True)  # (B, N, 1, H)
        return x / x_norm  # (B, N, N, H) 
Example 80
Project: keras_extension   Author: k1414st   File: graph.py    MIT License 4 votes vote down vote up
def call(self, inputs, training=None):
        """
        Args:
            input[0]: input_layer(N_Batch, L_sequence, Dim_fature)
            input[1]: weighted-digraph(L, L) = (from, to)
        Return:
            output_layer(N_Batch, L_sequence, Dim_feature)
        """
        seq_data = inputs[0]
        graph = inputs[1]

        # graph gate
        if self.use_gate:
            if self.gate_mode == 'single_weight':
                graph = graph * self.__graph_gate(seq_data, self.gate_w1)
            else:
                graph = graph * \
                    self.__graph_gate(seq_data, self.gate_w1, self.gate_w2)

        # beta (edge)
        beta = K.dot(seq_data, self.e_weight)
        if self.use_gat:
            att_alpha = \
                self.__graph_attention(graph, seq_data, self.att_w_weight,
                                       self.att_a_weight, n_heads=self.gat_n_heads)
            list_att_beta = []
            for i in range(self.gat_n_heads):
                att_beta = K.batch_dot(att_alpha[:, :, :, i], beta, axes=(2, 1))
                list_att_beta.append(att_beta)
            beta = sum(list_att_beta)
        else:
            beta = K.batch_dot(graph, beta, axes=(2, 1))  # BL(o)L(i),BL(i)D,->BL(o)D

        # connect edge, (node), bias
        out = beta
        if self.use_bias:
            out = K.bias_add(out, self.bias)
        if self.use_node_weight:
            alpha = K.dot(seq_data, self.v_weight)
            out = out + alpha
        gi = self.activation(out)
        return gi