Python keras.backend.any() Examples

The following are 30 code examples of keras.backend.any(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.backend , or try the search function .
Example #1
Source File: crf.py    From keras-contrib with MIT License 6 votes vote down vote up
def get_energy(self, y_true, input_energy, mask):
        """Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3
        """
        input_energy = K.sum(input_energy * y_true, 2)  # (B, T)
        # (B, T-1)
        chain_energy = K.sum(K.dot(y_true[:, :-1, :],
                                   self.chain_kernel) * y_true[:, 1:, :], 2)

        if mask is not None:
            mask = K.cast(mask, K.floatx())
            # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding
            chain_mask = mask[:, :-1] * mask[:, 1:]
            input_energy = input_energy * mask
            chain_energy = chain_energy * chain_mask
        total_energy = K.sum(input_energy, -1) + K.sum(chain_energy, -1)  # (B, )

        return total_energy 
Example #2
Source File: layers.py    From sequence-tagging-ner with Apache License 2.0 6 votes vote down vote up
def loss_function(self):
        if self.learn_mode == 'join':
            def loss(y_true, y_pred):
                assert self._inbound_nodes, 'CRF has not connected to any layer.'
                assert not self._outbound_nodes, 'When learn_model="join", CRF must be the last layer.'
                if self.sparse_target:
                    y_true = K.one_hot(K.cast(y_true[:, :, 0], 'int32'), self.units)
                X = self._inbound_nodes[0].input_tensors[0]
                mask = self._inbound_nodes[0].input_masks[0]
                nloglik = self.get_negative_log_likelihood(y_true, X, mask)
                return nloglik
            return loss
        else:
            if self.sparse_target:
                return sparse_categorical_crossentropy
            else:
                return categorical_crossentropy 
Example #3
Source File: layers.py    From indic_tagger with Apache License 2.0 6 votes vote down vote up
def loss_function(self):
        if self.learn_mode == 'join':
            def loss(y_true, y_pred):
                assert self._inbound_nodes, 'CRF has not connected to any layer.'
                assert not self._outbound_nodes, 'When learn_model="join", CRF must be the last layer.'
                if self.sparse_target:
                    y_true = K.one_hot(K.cast(y_true[:, :, 0], 'int32'), self.units)
                X = self._inbound_nodes[0].input_tensors[0]
                mask = self._inbound_nodes[0].input_masks[0]
                nloglik = self.get_negative_log_likelihood(y_true, X, mask)
                return nloglik
            return loss
        else:
            if self.sparse_target:
                return sparse_categorical_crossentropy
            else:
                return categorical_crossentropy 
Example #4
Source File: pooling.py    From onto-lstm with Apache License 2.0 6 votes vote down vote up
def call(self, x, mask=None):
        mean = super(IntraAttention, self).call(x, mask)
        # x: (batch_size, input_length, input_dim)
        # mean: (batch_size, input_dim)
        ones = K.expand_dims(K.mean(K.ones_like(x), axis=(0, 2)), dim=0)  # (1, input_length)
        # (batch_size, input_length, input_dim)
        tiled_mean = K.permute_dimensions(K.dot(K.expand_dims(mean), ones), (0, 2, 1))
        if mask is not None:
            if K.ndim(mask) > K.ndim(x):
                # Assuming this is because of the bug in Bidirectional. Temporary fix follows.
                # TODO: Fix Bidirectional.
                mask = K.any(mask, axis=(-2, -1))
            if K.ndim(mask) < K.ndim(x):
                mask = K.expand_dims(mask)
            x = switch(mask, x, K.zeros_like(x))
        # (batch_size, input_length, proj_dim)
        projected_combination = K.tanh(K.dot(x, self.vector_projector) + K.dot(tiled_mean, self.mean_projector))
        scores = K.dot(projected_combination, self.scorer)  # (batch_size, input_length)
        weights = K.softmax(scores)  # (batch_size, input_length)
        attended_x = K.sum(K.expand_dims(weights) * x, axis=1)  # (batch_size, input_dim)
        return attended_x 
Example #5
Source File: keras_bert_layer.py    From nlp_xiaojiang with MIT License 6 votes vote down vote up
def get_energy(self, y_true, input_energy, mask):
        """Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3
        """
        input_energy = K.sum(input_energy * y_true, 2)  # (B, T)
        # (B, T-1)
        chain_energy = K.sum(K.dot(y_true[:, :-1, :],
                                   self.chain_kernel) * y_true[:, 1:, :], 2)

        if mask is not None:
            mask = K.cast(mask, K.floatx())
            # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding
            chain_mask = mask[:, :-1] * mask[:, 1:]
            input_energy = input_energy * mask
            chain_energy = chain_energy * chain_mask
        total_energy = K.sum(input_energy, -1) + K.sum(chain_energy, -1)  # (B, )

        return total_energy 
Example #6
Source File: layers.py    From anago with MIT License 6 votes vote down vote up
def loss_function(self):
        if self.learn_mode == 'join':
            def loss(y_true, y_pred):
                assert self._inbound_nodes, 'CRF has not connected to any layer.'
                assert not self._outbound_nodes, 'When learn_model="join", CRF must be the last layer.'
                if self.sparse_target:
                    y_true = K.one_hot(K.cast(y_true[:, :, 0], 'int32'), self.units)
                X = self._inbound_nodes[0].input_tensors[0]
                mask = self._inbound_nodes[0].input_masks[0]
                nloglik = self.get_negative_log_likelihood(y_true, X, mask)
                return nloglik
            return loss
        else:
            if self.sparse_target:
                return sparse_categorical_crossentropy
            else:
                return categorical_crossentropy 
Example #7
Source File: pooling.py    From onto-lstm with Apache License 2.0 6 votes vote down vote up
def call(self, x, mask=None):
        # x: (batch_size, input_length, input_dim)
        if mask is None:
            return K.mean(x, axis=1)  # (batch_size, input_dim)
        else:
            # This is to remove padding from the computational graph.
            if K.ndim(mask) > K.ndim(x):
                # This is due to the bug in Bidirectional that is passing the input mask
                # instead of computing output mask.
                # TODO: Fix the implementation of Bidirectional.
                mask = K.any(mask, axis=(-2, -1))
            if K.ndim(mask) < K.ndim(x):
                mask = K.expand_dims(mask)
            masked_input = switch(mask, x, K.zeros_like(x))
            weights = K.cast(mask / (K.sum(mask) + K.epsilon()), 'float32')
            return K.sum(masked_input * weights, axis=1)  # (batch_size, input_dim) 
Example #8
Source File: __init__.py    From ikelos with MIT License 5 votes vote down vote up
def normalize_mask(x, mask):
    '''Keep the mask align wtih the tensor x

    Arguments: x is a data tensor; mask is a binary tensor
    Rationale: keep mask at same dimensionality as x, but only with a length-1 
               trailing dimension. This ensures broadcastability, which is important
               because inferring shapes is hard and shapes are easy to get wrong. 
    '''
    mask = K.cast(mask, K.floatx())
    while K.ndim(mask) != K.ndim(x):
        if K.ndim(mask) > K.ndim(x):
            mask = K.any(mask, axis=-1)
        elif K.ndim(mask) < K.ndim(x):
            mask = K.expand_dims(mask)
    return K.any(mask, axis=-1, keepdims=True) 
Example #9
Source File: layers.py    From sequence-tagging-ner with Apache License 2.0 5 votes vote down vote up
def get_log_normalization_constant(self, input_energy, mask, **kwargs):
        """Compute logarithm of the normalization constant Z, where
        Z = sum exp(-E) -> logZ = log sum exp(-E) =: -nlogZ
        """
        # should have logZ[:, i] == logZ[:, j] for any i, j
        logZ = self.recursion(input_energy, mask, return_sequences=False, **kwargs)
        return logZ[:, 0] 
Example #10
Source File: distribute.py    From ikelos with MIT License 5 votes vote down vote up
def compute_mask(self, x, mask=None):
        if mask is None:
            return None
        #import pdb
        #pdb.set_trace()
        target_dim = K.ndim(x) - 2
        num_reducing = K.ndim(mask) - target_dim
        if num_reducing:
            axes = tuple([-i for i in range(1,num_reducing+1)])
            mask = K.any(mask, axes)

        return mask 
Example #11
Source File: attention.py    From ikelos with MIT License 5 votes vote down vote up
def compute_mask(self, x, mask=None):
        if mask is None or mask.ndim==2:
            return None
        elif mask.ndim==3:
            mask = K.any(mask, axis=(1,2))
        else:
            raise Exception("Unexpected situation") 
Example #12
Source File: noisy_or.py    From deep_qa with Apache License 2.0 5 votes vote down vote up
def compute_mask(self, inputs, mask=None):
        # pylint: disable=unused-argument
        if mask is not None:
            return K.any(mask, axis=self.axis)
        return None 
Example #13
Source File: encoder_wrapper.py    From deep_qa with Apache License 2.0 5 votes vote down vote up
def compute_mask(self, x, input_mask=None):
        # pylint: disable=unused-argument
        # Input mask (coming from Embedding) will be of shape (batch_size, knowledge_length, num_words).
        # Output mask should be of shape (batch_size, knowledge_length) with 0s for background sentences that
        #       are all padding.
        if input_mask is None:
            return None
        else:
            # An output bit is 0 only if the  bits corresponding to all input words are 0.
            return K.any(input_mask, axis=-1) 
Example #14
Source File: add_encoder_mask.py    From deep_qa with Apache License 2.0 5 votes vote down vote up
def compute_mask(self, inputs, mask=None):
        encoder_mask, embedding_mask = mask
        if encoder_mask is not None:
            raise RuntimeError("Refusing to add an encoder mask, because the tensor already has one")
        return K.any(embedding_mask, axis=-1) 
Example #15
Source File: vector_matrix_merge.py    From deep_qa with Apache License 2.0 5 votes vote down vote up
def compute_mask(self, inputs, mask=None):
        if mask is None or all(m is None for m in mask) or not self.propagate_mask:
            return None
        mask_concat_axis = self.mask_concat_axis
        if mask_concat_axis is None:
            mask_concat_axis = self.concat_axis
            if mask_concat_axis < 0:
                mask_concat_axis %= K.ndim(inputs[-1])
        num_vectors = len(mask) - 1
        matrix_mask = mask[-1]
        if mask_concat_axis >= K.ndim(matrix_mask):
            # This means we're concatenating along an axis in the tensor that is greater than the
            # number of dimensions in the mask.  E.g., we're adding a single pre-computed feature
            # to a word embedding (if it was multiple features, you'd already have evenly shaped
            # tensors, so you could just use a Concatenate layer).  In this case, we take all of
            # the masks, assume they have the same shape, and compute K.any() with them.
            masks = [matrix_mask] + [m for m in mask[:-1] if m is not None]
            shapes = set([K.int_shape(m) for m in masks])
            assert len(shapes) == 1, "Can't compute mask with uneven shapes: " + shapes
            expanded_masks = [K.expand_dims(m, axis=-1) for m in masks]
            concated_masks = K.concatenate(expanded_masks, axis=-1)
            return K.any(concated_masks, axis=-1)
        vector_masks = []
        for i in range(num_vectors):
            vector_mask = mask[i]
            if vector_mask is None:
                vector_mask_template = K.sum(K.cast(matrix_mask, 'uint8'), axis=mask_concat_axis)
                vector_mask = K.cast(K.ones_like(vector_mask_template), 'bool')
            vector_masks.append(K.expand_dims(vector_mask, axis=mask_concat_axis))
        return K.concatenate(vector_masks + [matrix_mask], axis=mask_concat_axis) 
Example #16
Source File: max.py    From deep_qa with Apache License 2.0 5 votes vote down vote up
def compute_mask(self, inputs, mask=None):
        # pylint: disable=unused-argument
        if mask is None:
            return None
        return K.any(mask, axis=self.axis) 
Example #17
Source File: layers.py    From anago with MIT License 5 votes vote down vote up
def compute_mask(self, input, mask=None):
        if mask is not None and self.learn_mode == 'join':
            return K.any(mask, axis=1)
        return mask 
Example #18
Source File: layers.py    From anago with MIT License 5 votes vote down vote up
def get_log_normalization_constant(self, input_energy, mask, **kwargs):
        """Compute logarithm of the normalization constant Z, where
        Z = sum exp(-E) -> logZ = log sum exp(-E) =: -nlogZ
        """
        # should have logZ[:, i] == logZ[:, j] for any i, j
        logZ = self.recursion(input_energy, mask, return_sequences=False, **kwargs)
        return logZ[:, 0] 
Example #19
Source File: layers.py    From anago with MIT License 5 votes vote down vote up
def get_energy(self, y_true, input_energy, mask):
        """Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3
        """
        input_energy = K.sum(input_energy * y_true, 2)  # (B, T)
        chain_energy = K.sum(K.dot(y_true[:, :-1, :], self.chain_kernel) * y_true[:, 1:, :], 2)  # (B, T-1)

        if mask is not None:
            mask = K.cast(mask, K.floatx())
            chain_mask = mask[:, :-1] * mask[:, 1:]  # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding
            input_energy = input_energy * mask
            chain_energy = chain_energy * chain_mask
        total_energy = K.sum(input_energy, -1) + K.sum(chain_energy, -1)  # (B, )

        return total_energy 
Example #20
Source File: layers.py    From anago with MIT License 5 votes vote down vote up
def viterbi_decoding(self, X, mask=None):
        input_energy = self.activation(K.dot(X, self.kernel) + self.bias)
        if self.use_boundary:
            input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary)

        argmin_tables = self.recursion(input_energy, mask, return_logZ=False)
        argmin_tables = K.cast(argmin_tables, 'int32')

        # backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same
        argmin_tables = K.reverse(argmin_tables, 1)
        initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])]  # matrix instead of vector is required by tf `K.rnn`
        if K.backend() == 'theano':
            initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)]

        def gather_each_row(params, indices):
            n = K.shape(indices)[0]
            if K.backend() == 'theano':
                return params[K.T.arange(n), indices]
            else:
                indices = K.transpose(K.stack([K.tf.range(n), indices]))
                return K.tf.gather_nd(params, indices)

        def find_path(argmin_table, best_idx):
            next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0])
            next_best_idx = K.expand_dims(next_best_idx)
            if K.backend() == 'theano':
                next_best_idx = K.T.unbroadcast(next_best_idx, 1)
            return next_best_idx, [next_best_idx]

        _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll)
        best_paths = K.reverse(best_paths, 1)
        best_paths = K.squeeze(best_paths, 2)

        return K.one_hot(best_paths, self.units) 
Example #21
Source File: layers.py    From indic_tagger with Apache License 2.0 5 votes vote down vote up
def compute_mask(self, input, mask=None):
        if mask is not None and self.learn_mode == 'join':
            return K.any(mask, axis=1)
        return mask 
Example #22
Source File: layers.py    From indic_tagger with Apache License 2.0 5 votes vote down vote up
def get_log_normalization_constant(self, input_energy, mask, **kwargs):
        """Compute logarithm of the normalization constant Z, where
        Z = sum exp(-E) -> logZ = log sum exp(-E) =: -nlogZ
        """
        # should have logZ[:, i] == logZ[:, j] for any i, j
        logZ = self.recursion(input_energy, mask, return_sequences=False, **kwargs)
        return logZ[:, 0] 
Example #23
Source File: layers.py    From indic_tagger with Apache License 2.0 5 votes vote down vote up
def get_energy(self, y_true, input_energy, mask):
        """Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3
        """
        input_energy = K.sum(input_energy * y_true, 2)  # (B, T)
        chain_energy = K.sum(K.dot(y_true[:, :-1, :], self.chain_kernel) * y_true[:, 1:, :], 2)  # (B, T-1)

        if mask is not None:
            mask = K.cast(mask, K.floatx())
            chain_mask = mask[:, :-1] * mask[:, 1:]  # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding
            input_energy = input_energy * mask
            chain_energy = chain_energy * chain_mask
        total_energy = K.sum(input_energy, -1) + K.sum(chain_energy, -1)  # (B, )

        return total_energy 
Example #24
Source File: layers.py    From indic_tagger with Apache License 2.0 5 votes vote down vote up
def viterbi_decoding(self, X, mask=None):
        input_energy = self.activation(K.dot(X, self.kernel) + self.bias)
        if self.use_boundary:
            input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary)

        argmin_tables = self.recursion(input_energy, mask, return_logZ=False)
        argmin_tables = K.cast(argmin_tables, 'int32')

        # backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same
        argmin_tables = K.reverse(argmin_tables, 1)
        initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])]  # matrix instead of vector is required by tf `K.rnn`
        if K.backend() == 'theano':
            initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)]

        def gather_each_row(params, indices):
            n = K.shape(indices)[0]
            if K.backend() == 'theano':
                return params[K.T.arange(n), indices]
            else:
                indices = K.transpose(K.stack([K.tf.range(n), indices]))
                return K.tf.gather_nd(params, indices)

        def find_path(argmin_table, best_idx):
            next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0])
            next_best_idx = K.expand_dims(next_best_idx)
            if K.backend() == 'theano':
                next_best_idx = K.T.unbroadcast(next_best_idx, 1)
            return next_best_idx, [next_best_idx]

        _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll)
        best_paths = K.reverse(best_paths, 1)
        best_paths = K.squeeze(best_paths, 2)

        return K.one_hot(best_paths, self.units) 
Example #25
Source File: fractalnet.py    From keras-fractalnet with MIT License 5 votes vote down vote up
def _gen_local_drops(self, count, p):
        # Create a local droppath with at least one path
        arr = self._random_arr(count, p)
        drops = K.switch(
            K.any(arr),
            arr,
            self._arr_with_one(count)
        )
        return drops 
Example #26
Source File: keras_bert_layer.py    From nlp_xiaojiang with MIT License 5 votes vote down vote up
def viterbi_decoding(self, X, mask=None):
        input_energy = self.activation(K.dot(X, self.kernel) + self.bias)
        if self.use_boundary:
            input_energy = self.add_boundary_energy(
                input_energy, mask, self.left_boundary, self.right_boundary)

        argmin_tables = self.recursion(input_energy, mask, return_logZ=False)
        argmin_tables = K.cast(argmin_tables, 'int32')

        # backward to find best path, `initial_best_idx` can be any,
        # as all elements in the last argmin_table are the same
        argmin_tables = K.reverse(argmin_tables, 1)
        # matrix instead of vector is required by tf `K.rnn`
        initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])]
        if K.backend() == 'theano':
            initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)]

        def gather_each_row(params, indices):
            n = K.shape(indices)[0]
            if K.backend() == 'theano':
                return params[K.T.arange(n), indices]
            else:
                indices = K.transpose(K.stack([K.tf.range(n), indices]))
                return K.tf.gather_nd(params, indices)

        def find_path(argmin_table, best_idx):
            next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0])
            next_best_idx = K.expand_dims(next_best_idx)
            if K.backend() == 'theano':
                next_best_idx = K.T.unbroadcast(next_best_idx, 1)
            return next_best_idx, [next_best_idx]

        _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx,
                                 input_length=K.int_shape(X)[1], unroll=self.unroll)
        best_paths = K.reverse(best_paths, 1)
        best_paths = K.squeeze(best_paths, 2)

        return K.one_hot(best_paths, self.units) 
Example #27
Source File: ChainCRF.py    From naacl18-multitask_argument_mining with Apache License 2.0 5 votes vote down vote up
def compute_mask(self, input, mask=None):
        if mask is not None:
            return K.any(mask, axis=1)
        return mask 
Example #28
Source File: crf.py    From keras-contrib with MIT License 5 votes vote down vote up
def compute_mask(self, input, mask=None):
        if mask is not None and self.learn_mode == 'join':
            return K.any(mask, axis=1)
        return mask 
Example #29
Source File: crf.py    From keras-contrib with MIT License 5 votes vote down vote up
def get_log_normalization_constant(self, input_energy, mask, **kwargs):
        """Compute logarithm of the normalization constant Z, where
        Z = sum exp(-E) -> logZ = log sum exp(-E) =: -nlogZ
        """
        # should have logZ[:, i] == logZ[:, j] for any i, j
        logZ = self.recursion(input_energy, mask, return_sequences=False, **kwargs)
        return logZ[:, 0] 
Example #30
Source File: ntm.py    From ntm_keras with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _cosine_distance(M, k):
    # this is equation (6), or as I like to call it: The NaN factory.
    # TODO: Find it in a library (keras cosine loss?)
    # normalizing first as it is better conditioned.
    nk = K.l2_normalize(k, axis=-1)
    nM = K.l2_normalize(M, axis=-1)
    cosine_distance = K.batch_dot(nM, nk)
    # TODO: Do succesfull error handling
    #cosine_distance_error_handling = tf.Print(cosine_distance, [cosine_distance], message="NaN occured in _cosine_distance")
    #cosine_distance_error_handling = K.ones(cosine_distance_error_handling.shape)
    #cosine_distance = tf.case({K.any(tf.is_nan(cosine_distance)) : (lambda: cosine_distance_error_handling)},
    #        default = lambda: cosine_distance, strict=True)
    return cosine_distance