Python keras.backend.any() Examples
The following are 30
code examples of keras.backend.any().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.backend
, or try the search function
.
Example #1
Source File: crf.py From keras-contrib with MIT License | 6 votes |
def get_energy(self, y_true, input_energy, mask): """Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3 """ input_energy = K.sum(input_energy * y_true, 2) # (B, T) # (B, T-1) chain_energy = K.sum(K.dot(y_true[:, :-1, :], self.chain_kernel) * y_true[:, 1:, :], 2) if mask is not None: mask = K.cast(mask, K.floatx()) # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding chain_mask = mask[:, :-1] * mask[:, 1:] input_energy = input_energy * mask chain_energy = chain_energy * chain_mask total_energy = K.sum(input_energy, -1) + K.sum(chain_energy, -1) # (B, ) return total_energy
Example #2
Source File: layers.py From sequence-tagging-ner with Apache License 2.0 | 6 votes |
def loss_function(self): if self.learn_mode == 'join': def loss(y_true, y_pred): assert self._inbound_nodes, 'CRF has not connected to any layer.' assert not self._outbound_nodes, 'When learn_model="join", CRF must be the last layer.' if self.sparse_target: y_true = K.one_hot(K.cast(y_true[:, :, 0], 'int32'), self.units) X = self._inbound_nodes[0].input_tensors[0] mask = self._inbound_nodes[0].input_masks[0] nloglik = self.get_negative_log_likelihood(y_true, X, mask) return nloglik return loss else: if self.sparse_target: return sparse_categorical_crossentropy else: return categorical_crossentropy
Example #3
Source File: layers.py From indic_tagger with Apache License 2.0 | 6 votes |
def loss_function(self): if self.learn_mode == 'join': def loss(y_true, y_pred): assert self._inbound_nodes, 'CRF has not connected to any layer.' assert not self._outbound_nodes, 'When learn_model="join", CRF must be the last layer.' if self.sparse_target: y_true = K.one_hot(K.cast(y_true[:, :, 0], 'int32'), self.units) X = self._inbound_nodes[0].input_tensors[0] mask = self._inbound_nodes[0].input_masks[0] nloglik = self.get_negative_log_likelihood(y_true, X, mask) return nloglik return loss else: if self.sparse_target: return sparse_categorical_crossentropy else: return categorical_crossentropy
Example #4
Source File: pooling.py From onto-lstm with Apache License 2.0 | 6 votes |
def call(self, x, mask=None): mean = super(IntraAttention, self).call(x, mask) # x: (batch_size, input_length, input_dim) # mean: (batch_size, input_dim) ones = K.expand_dims(K.mean(K.ones_like(x), axis=(0, 2)), dim=0) # (1, input_length) # (batch_size, input_length, input_dim) tiled_mean = K.permute_dimensions(K.dot(K.expand_dims(mean), ones), (0, 2, 1)) if mask is not None: if K.ndim(mask) > K.ndim(x): # Assuming this is because of the bug in Bidirectional. Temporary fix follows. # TODO: Fix Bidirectional. mask = K.any(mask, axis=(-2, -1)) if K.ndim(mask) < K.ndim(x): mask = K.expand_dims(mask) x = switch(mask, x, K.zeros_like(x)) # (batch_size, input_length, proj_dim) projected_combination = K.tanh(K.dot(x, self.vector_projector) + K.dot(tiled_mean, self.mean_projector)) scores = K.dot(projected_combination, self.scorer) # (batch_size, input_length) weights = K.softmax(scores) # (batch_size, input_length) attended_x = K.sum(K.expand_dims(weights) * x, axis=1) # (batch_size, input_dim) return attended_x
Example #5
Source File: keras_bert_layer.py From nlp_xiaojiang with MIT License | 6 votes |
def get_energy(self, y_true, input_energy, mask): """Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3 """ input_energy = K.sum(input_energy * y_true, 2) # (B, T) # (B, T-1) chain_energy = K.sum(K.dot(y_true[:, :-1, :], self.chain_kernel) * y_true[:, 1:, :], 2) if mask is not None: mask = K.cast(mask, K.floatx()) # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding chain_mask = mask[:, :-1] * mask[:, 1:] input_energy = input_energy * mask chain_energy = chain_energy * chain_mask total_energy = K.sum(input_energy, -1) + K.sum(chain_energy, -1) # (B, ) return total_energy
Example #6
Source File: layers.py From anago with MIT License | 6 votes |
def loss_function(self): if self.learn_mode == 'join': def loss(y_true, y_pred): assert self._inbound_nodes, 'CRF has not connected to any layer.' assert not self._outbound_nodes, 'When learn_model="join", CRF must be the last layer.' if self.sparse_target: y_true = K.one_hot(K.cast(y_true[:, :, 0], 'int32'), self.units) X = self._inbound_nodes[0].input_tensors[0] mask = self._inbound_nodes[0].input_masks[0] nloglik = self.get_negative_log_likelihood(y_true, X, mask) return nloglik return loss else: if self.sparse_target: return sparse_categorical_crossentropy else: return categorical_crossentropy
Example #7
Source File: pooling.py From onto-lstm with Apache License 2.0 | 6 votes |
def call(self, x, mask=None): # x: (batch_size, input_length, input_dim) if mask is None: return K.mean(x, axis=1) # (batch_size, input_dim) else: # This is to remove padding from the computational graph. if K.ndim(mask) > K.ndim(x): # This is due to the bug in Bidirectional that is passing the input mask # instead of computing output mask. # TODO: Fix the implementation of Bidirectional. mask = K.any(mask, axis=(-2, -1)) if K.ndim(mask) < K.ndim(x): mask = K.expand_dims(mask) masked_input = switch(mask, x, K.zeros_like(x)) weights = K.cast(mask / (K.sum(mask) + K.epsilon()), 'float32') return K.sum(masked_input * weights, axis=1) # (batch_size, input_dim)
Example #8
Source File: __init__.py From ikelos with MIT License | 5 votes |
def normalize_mask(x, mask): '''Keep the mask align wtih the tensor x Arguments: x is a data tensor; mask is a binary tensor Rationale: keep mask at same dimensionality as x, but only with a length-1 trailing dimension. This ensures broadcastability, which is important because inferring shapes is hard and shapes are easy to get wrong. ''' mask = K.cast(mask, K.floatx()) while K.ndim(mask) != K.ndim(x): if K.ndim(mask) > K.ndim(x): mask = K.any(mask, axis=-1) elif K.ndim(mask) < K.ndim(x): mask = K.expand_dims(mask) return K.any(mask, axis=-1, keepdims=True)
Example #9
Source File: layers.py From sequence-tagging-ner with Apache License 2.0 | 5 votes |
def get_log_normalization_constant(self, input_energy, mask, **kwargs): """Compute logarithm of the normalization constant Z, where Z = sum exp(-E) -> logZ = log sum exp(-E) =: -nlogZ """ # should have logZ[:, i] == logZ[:, j] for any i, j logZ = self.recursion(input_energy, mask, return_sequences=False, **kwargs) return logZ[:, 0]
Example #10
Source File: distribute.py From ikelos with MIT License | 5 votes |
def compute_mask(self, x, mask=None): if mask is None: return None #import pdb #pdb.set_trace() target_dim = K.ndim(x) - 2 num_reducing = K.ndim(mask) - target_dim if num_reducing: axes = tuple([-i for i in range(1,num_reducing+1)]) mask = K.any(mask, axes) return mask
Example #11
Source File: attention.py From ikelos with MIT License | 5 votes |
def compute_mask(self, x, mask=None): if mask is None or mask.ndim==2: return None elif mask.ndim==3: mask = K.any(mask, axis=(1,2)) else: raise Exception("Unexpected situation")
Example #12
Source File: noisy_or.py From deep_qa with Apache License 2.0 | 5 votes |
def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument if mask is not None: return K.any(mask, axis=self.axis) return None
Example #13
Source File: encoder_wrapper.py From deep_qa with Apache License 2.0 | 5 votes |
def compute_mask(self, x, input_mask=None): # pylint: disable=unused-argument # Input mask (coming from Embedding) will be of shape (batch_size, knowledge_length, num_words). # Output mask should be of shape (batch_size, knowledge_length) with 0s for background sentences that # are all padding. if input_mask is None: return None else: # An output bit is 0 only if the bits corresponding to all input words are 0. return K.any(input_mask, axis=-1)
Example #14
Source File: add_encoder_mask.py From deep_qa with Apache License 2.0 | 5 votes |
def compute_mask(self, inputs, mask=None): encoder_mask, embedding_mask = mask if encoder_mask is not None: raise RuntimeError("Refusing to add an encoder mask, because the tensor already has one") return K.any(embedding_mask, axis=-1)
Example #15
Source File: vector_matrix_merge.py From deep_qa with Apache License 2.0 | 5 votes |
def compute_mask(self, inputs, mask=None): if mask is None or all(m is None for m in mask) or not self.propagate_mask: return None mask_concat_axis = self.mask_concat_axis if mask_concat_axis is None: mask_concat_axis = self.concat_axis if mask_concat_axis < 0: mask_concat_axis %= K.ndim(inputs[-1]) num_vectors = len(mask) - 1 matrix_mask = mask[-1] if mask_concat_axis >= K.ndim(matrix_mask): # This means we're concatenating along an axis in the tensor that is greater than the # number of dimensions in the mask. E.g., we're adding a single pre-computed feature # to a word embedding (if it was multiple features, you'd already have evenly shaped # tensors, so you could just use a Concatenate layer). In this case, we take all of # the masks, assume they have the same shape, and compute K.any() with them. masks = [matrix_mask] + [m for m in mask[:-1] if m is not None] shapes = set([K.int_shape(m) for m in masks]) assert len(shapes) == 1, "Can't compute mask with uneven shapes: " + shapes expanded_masks = [K.expand_dims(m, axis=-1) for m in masks] concated_masks = K.concatenate(expanded_masks, axis=-1) return K.any(concated_masks, axis=-1) vector_masks = [] for i in range(num_vectors): vector_mask = mask[i] if vector_mask is None: vector_mask_template = K.sum(K.cast(matrix_mask, 'uint8'), axis=mask_concat_axis) vector_mask = K.cast(K.ones_like(vector_mask_template), 'bool') vector_masks.append(K.expand_dims(vector_mask, axis=mask_concat_axis)) return K.concatenate(vector_masks + [matrix_mask], axis=mask_concat_axis)
Example #16
Source File: max.py From deep_qa with Apache License 2.0 | 5 votes |
def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument if mask is None: return None return K.any(mask, axis=self.axis)
Example #17
Source File: layers.py From anago with MIT License | 5 votes |
def compute_mask(self, input, mask=None): if mask is not None and self.learn_mode == 'join': return K.any(mask, axis=1) return mask
Example #18
Source File: layers.py From anago with MIT License | 5 votes |
def get_log_normalization_constant(self, input_energy, mask, **kwargs): """Compute logarithm of the normalization constant Z, where Z = sum exp(-E) -> logZ = log sum exp(-E) =: -nlogZ """ # should have logZ[:, i] == logZ[:, j] for any i, j logZ = self.recursion(input_energy, mask, return_sequences=False, **kwargs) return logZ[:, 0]
Example #19
Source File: layers.py From anago with MIT License | 5 votes |
def get_energy(self, y_true, input_energy, mask): """Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3 """ input_energy = K.sum(input_energy * y_true, 2) # (B, T) chain_energy = K.sum(K.dot(y_true[:, :-1, :], self.chain_kernel) * y_true[:, 1:, :], 2) # (B, T-1) if mask is not None: mask = K.cast(mask, K.floatx()) chain_mask = mask[:, :-1] * mask[:, 1:] # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding input_energy = input_energy * mask chain_energy = chain_energy * chain_mask total_energy = K.sum(input_energy, -1) + K.sum(chain_energy, -1) # (B, ) return total_energy
Example #20
Source File: layers.py From anago with MIT License | 5 votes |
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] # matrix instead of vector is required by tf `K.rnn` if K.backend() == 'theano': initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)] def gather_each_row(params, indices): n = K.shape(indices)[0] if K.backend() == 'theano': return params[K.T.arange(n), indices] else: indices = K.transpose(K.stack([K.tf.range(n), indices])) return K.tf.gather_nd(params, indices) def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': next_best_idx = K.T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)
Example #21
Source File: layers.py From indic_tagger with Apache License 2.0 | 5 votes |
def compute_mask(self, input, mask=None): if mask is not None and self.learn_mode == 'join': return K.any(mask, axis=1) return mask
Example #22
Source File: layers.py From indic_tagger with Apache License 2.0 | 5 votes |
def get_log_normalization_constant(self, input_energy, mask, **kwargs): """Compute logarithm of the normalization constant Z, where Z = sum exp(-E) -> logZ = log sum exp(-E) =: -nlogZ """ # should have logZ[:, i] == logZ[:, j] for any i, j logZ = self.recursion(input_energy, mask, return_sequences=False, **kwargs) return logZ[:, 0]
Example #23
Source File: layers.py From indic_tagger with Apache License 2.0 | 5 votes |
def get_energy(self, y_true, input_energy, mask): """Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3 """ input_energy = K.sum(input_energy * y_true, 2) # (B, T) chain_energy = K.sum(K.dot(y_true[:, :-1, :], self.chain_kernel) * y_true[:, 1:, :], 2) # (B, T-1) if mask is not None: mask = K.cast(mask, K.floatx()) chain_mask = mask[:, :-1] * mask[:, 1:] # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding input_energy = input_energy * mask chain_energy = chain_energy * chain_mask total_energy = K.sum(input_energy, -1) + K.sum(chain_energy, -1) # (B, ) return total_energy
Example #24
Source File: layers.py From indic_tagger with Apache License 2.0 | 5 votes |
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] # matrix instead of vector is required by tf `K.rnn` if K.backend() == 'theano': initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)] def gather_each_row(params, indices): n = K.shape(indices)[0] if K.backend() == 'theano': return params[K.T.arange(n), indices] else: indices = K.transpose(K.stack([K.tf.range(n), indices])) return K.tf.gather_nd(params, indices) def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': next_best_idx = K.T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)
Example #25
Source File: fractalnet.py From keras-fractalnet with MIT License | 5 votes |
def _gen_local_drops(self, count, p): # Create a local droppath with at least one path arr = self._random_arr(count, p) drops = K.switch( K.any(arr), arr, self._arr_with_one(count) ) return drops
Example #26
Source File: keras_bert_layer.py From nlp_xiaojiang with MIT License | 5 votes |
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy( input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, # as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) # matrix instead of vector is required by tf `K.rnn` initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] if K.backend() == 'theano': initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)] def gather_each_row(params, indices): n = K.shape(indices)[0] if K.backend() == 'theano': return params[K.T.arange(n), indices] else: indices = K.transpose(K.stack([K.tf.range(n), indices])) return K.tf.gather_nd(params, indices) def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': next_best_idx = K.T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)
Example #27
Source File: ChainCRF.py From naacl18-multitask_argument_mining with Apache License 2.0 | 5 votes |
def compute_mask(self, input, mask=None): if mask is not None: return K.any(mask, axis=1) return mask
Example #28
Source File: crf.py From keras-contrib with MIT License | 5 votes |
def compute_mask(self, input, mask=None): if mask is not None and self.learn_mode == 'join': return K.any(mask, axis=1) return mask
Example #29
Source File: crf.py From keras-contrib with MIT License | 5 votes |
def get_log_normalization_constant(self, input_energy, mask, **kwargs): """Compute logarithm of the normalization constant Z, where Z = sum exp(-E) -> logZ = log sum exp(-E) =: -nlogZ """ # should have logZ[:, i] == logZ[:, j] for any i, j logZ = self.recursion(input_energy, mask, return_sequences=False, **kwargs) return logZ[:, 0]
Example #30
Source File: ntm.py From ntm_keras with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _cosine_distance(M, k): # this is equation (6), or as I like to call it: The NaN factory. # TODO: Find it in a library (keras cosine loss?) # normalizing first as it is better conditioned. nk = K.l2_normalize(k, axis=-1) nM = K.l2_normalize(M, axis=-1) cosine_distance = K.batch_dot(nM, nk) # TODO: Do succesfull error handling #cosine_distance_error_handling = tf.Print(cosine_distance, [cosine_distance], message="NaN occured in _cosine_distance") #cosine_distance_error_handling = K.ones(cosine_distance_error_handling.shape) #cosine_distance = tf.case({K.any(tf.is_nan(cosine_distance)) : (lambda: cosine_distance_error_handling)}, # default = lambda: cosine_distance, strict=True) return cosine_distance