Python keras.backend.rnn() Examples
The following are 30
code examples of keras.backend.rnn().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.backend
, or try the search function
.
Example #1
Source File: layers.py From delft with Apache License 2.0 | 6 votes |
def _forward(x, reduce_step, initial_states, U, mask=None): """Forward recurrence of the linear chain crf.""" def _forward_step(energy_matrix_t, states): alpha_tm1 = states[-1] new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t) return new_states[0], new_states U_shared = K.expand_dims(K.expand_dims(U, 0), 0) if mask is not None: mask = K.cast(mask, K.floatx()) mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3) U_shared = U_shared * mask_U inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1) last, values, _ = K.rnn(_forward_step, inputs, initial_states) return last, values
Example #2
Source File: layers.py From sequence-tagging-ner with Apache License 2.0 | 6 votes |
def step(self, input_energy_t, states, return_logZ=True): # not in the following `prev_target_val` has shape = (B, F) # where B = batch_size, F = output feature dim # Note: `i` is of float32, due to the behavior of `K.rnn` prev_target_val, i, chain_energy = states[:3] t = K.cast(i[0, 0], dtype='int32') if len(states) > 3: if K.backend() == 'theano': m = states[3][:, t:(t + 2)] else: m = K.tf.slice(states[3], [0, t], [-1, 2]) input_energy_t = input_energy_t * K.expand_dims(m[:, 0]) chain_energy = chain_energy * K.expand_dims(K.expand_dims(m[:, 0] * m[:, 1])) # (1, F, F)*(B, 1, 1) -> (B, F, F) if return_logZ: energy = chain_energy + K.expand_dims(input_energy_t - prev_target_val, 2) # shapes: (1, B, F) + (B, F, 1) -> (B, F, F) new_target_val = K.logsumexp(-energy, 1) # shapes: (B, F) return new_target_val, [new_target_val, i + 1] else: energy = chain_energy + K.expand_dims(input_energy_t + prev_target_val, 2) min_energy = K.min(energy, 1) argmin_table = K.cast(K.argmin(energy, 1), K.floatx()) # cast for tf-version `K.rnn` return argmin_table, [min_energy, i + 1]
Example #3
Source File: shareable_gru.py From deep_qa with Apache License 2.0 | 6 votes |
def call(self, x, mask=None, **kwargs): input_shape = K.int_shape(x) res = super(ShareableGRU, self).call(x, mask, **kwargs) self.input_spec = [InputSpec(shape=(self.input_spec[0].shape[0], None, self.input_spec[0].shape[2]))] if K.ndim(x) == K.ndim(res): # A recent change in Keras # (https://github.com/fchollet/keras/commit/a9b6bef0624c67d6df1618ca63d8e8141b0df4d0) # made it so that K.rnn with a tensorflow backend does not retain shape information for # the sequence length, even if it's present in the input. We need to fix that here so # that our models have the right shape information. A simple K.reshape is good enough # to fix this. result_shape = K.int_shape(res) if input_shape[1] is not None and result_shape[1] is None: shape = (input_shape[0] if input_shape[0] is not None else -1, input_shape[1], result_shape[2]) res = K.reshape(res, shape=shape) return res
Example #4
Source File: attentive_convlstm.py From sam with MIT License | 6 votes |
def call(self, x, mask=None): input_shape = self.input_spec[0].shape initial_states = self.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=False, mask=mask, constants=constants, unroll=False, input_length=input_shape[1]) if last_output.ndim == 3: last_output = K.expand_dims(last_output, dim=0) return last_output
Example #5
Source File: nse.py From neural-semantic-encoders with Apache License 2.0 | 6 votes |
def call(self, x, mask=None): # input_shape = (batch_size, input_length, input_dim). This needs to be defined in build. read_output, initial_memory_states, output_mask = self.read(x, mask) initial_write_states = self.writer.get_initial_states(read_output) # h_0 and c_0 of the writer LSTM initial_states = initial_memory_states + initial_write_states # last_output: (batch_size, output_dim) # all_outputs: (batch_size, input_length, output_dim) # last_states: # last_memory_state: (batch_size, input_length, output_dim) # last_output # last_writer_ct last_output, all_outputs, last_states = K.rnn(self.compose_and_write_step, read_output, initial_states, mask=output_mask) last_memory = last_states[0] if self.return_mode == "last_output": return last_output elif self.return_mode == "all_outputs": return all_outputs else: # return mode is output_and_memory expanded_last_output = K.expand_dims(last_output, dim=1) # (batch_size, 1, output_dim) # (batch_size, 1+input_length, output_dim) return K.concatenate([expanded_last_output, last_memory], axis=1)
Example #6
Source File: layers.py From anago with MIT License | 6 votes |
def step(self, input_energy_t, states, return_logZ=True): # not in the following `prev_target_val` has shape = (B, F) # where B = batch_size, F = output feature dim # Note: `i` is of float32, due to the behavior of `K.rnn` prev_target_val, i, chain_energy = states[:3] t = K.cast(i[0, 0], dtype='int32') if len(states) > 3: if K.backend() == 'theano': m = states[3][:, t:(t + 2)] else: m = K.tf.slice(states[3], [0, t], [-1, 2]) input_energy_t = input_energy_t * K.expand_dims(m[:, 0]) chain_energy = chain_energy * K.expand_dims(K.expand_dims(m[:, 0] * m[:, 1])) # (1, F, F)*(B, 1, 1) -> (B, F, F) if return_logZ: energy = chain_energy + K.expand_dims(input_energy_t - prev_target_val, 2) # shapes: (1, B, F) + (B, F, 1) -> (B, F, F) new_target_val = K.logsumexp(-energy, 1) # shapes: (B, F) return new_target_val, [new_target_val, i + 1] else: energy = chain_energy + K.expand_dims(input_energy_t + prev_target_val, 2) min_energy = K.min(energy, 1) argmin_table = K.cast(K.argmin(energy, 1), K.floatx()) # cast for tf-version `K.rnn` return argmin_table, [min_energy, i + 1]
Example #7
Source File: ChainCRF.py From emnlp2017-bilstm-cnn-crf with Apache License 2.0 | 6 votes |
def _backward(gamma, mask): '''Backward recurrence of the linear chain crf.''' gamma = K.cast(gamma, 'int32') def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) if mask is not None: mask = K.cast(mask, dtype='int32') # mask output y *= mask # set masked values to -1 y += -(1 - mask) return y
Example #8
Source File: ChainCRF.py From emnlp2017-bilstm-cnn-crf with Apache License 2.0 | 6 votes |
def _forward(x, reduce_step, initial_states, U, mask=None): '''Forward recurrence of the linear chain crf.''' def _forward_step(energy_matrix_t, states): alpha_tm1 = states[-1] new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t) return new_states[0], new_states U_shared = K.expand_dims(K.expand_dims(U, 0), 0) if mask is not None: mask = K.cast(mask, K.floatx()) mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3) U_shared = U_shared * mask_U inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1) last, values, _ = K.rnn(_forward_step, inputs, initial_states) return last, values
Example #9
Source File: PointerLSTM.py From pointer-networks with MIT License | 6 votes |
def call(self, x, mask=None): input_shape = self.input_spec[0].shape en_seq = x x_input = x[:, input_shape[1]-1, :] x_input = K.repeat(x_input, input_shape[1]) initial_states = self.get_initial_states(x_input) constants = super(PointerLSTM, self).get_constants(x_input) constants.append(en_seq) preprocessed_input = self.preprocess_input(x_input) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, constants=constants, input_length=input_shape[1]) return outputs
Example #10
Source File: ChainCRF.py From naacl18-multitask_argument_mining with Apache License 2.0 | 6 votes |
def _backward(gamma, mask): '''Backward recurrence of the linear chain crf.''' gamma = K.cast(gamma, 'int32') def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) if mask is not None: mask = K.cast(mask, dtype='int32') # mask output y *= mask # set masked values to -1 y += -(1 - mask) return y
Example #11
Source File: ChainCRF.py From naacl18-multitask_argument_mining with Apache License 2.0 | 6 votes |
def _forward(x, reduce_step, initial_states, U, mask=None): '''Forward recurrence of the linear chain crf.''' def _forward_step(energy_matrix_t, states): alpha_tm1 = states[-1] new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t) return new_states[0], new_states U_shared = K.expand_dims(K.expand_dims(U, 0), 0) if mask is not None: mask = K.cast(mask, K.floatx()) mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3) U_shared = U_shared * mask_U inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1) last, values, _ = K.rnn(_forward_step, inputs, initial_states) return last, values
Example #12
Source File: layers.py From indic_tagger with Apache License 2.0 | 6 votes |
def step(self, input_energy_t, states, return_logZ=True): # not in the following `prev_target_val` has shape = (B, F) # where B = batch_size, F = output feature dim # Note: `i` is of float32, due to the behavior of `K.rnn` prev_target_val, i, chain_energy = states[:3] t = K.cast(i[0, 0], dtype='int32') if len(states) > 3: if K.backend() == 'theano': m = states[3][:, t:(t + 2)] else: m = K.tf.slice(states[3], [0, t], [-1, 2]) input_energy_t = input_energy_t * K.expand_dims(m[:, 0]) chain_energy = chain_energy * K.expand_dims(K.expand_dims(m[:, 0] * m[:, 1])) # (1, F, F)*(B, 1, 1) -> (B, F, F) if return_logZ: energy = chain_energy + K.expand_dims(input_energy_t - prev_target_val, 2) # shapes: (1, B, F) + (B, F, 1) -> (B, F, F) new_target_val = K.logsumexp(-energy, 1) # shapes: (B, F) return new_target_val, [new_target_val, i + 1] else: energy = chain_energy + K.expand_dims(input_energy_t + prev_target_val, 2) min_energy = K.min(energy, 1) argmin_table = K.cast(K.argmin(energy, 1), K.floatx()) # cast for tf-version `K.rnn` return argmin_table, [min_energy, i + 1]
Example #13
Source File: ChainCRF.py From elmo-bilstm-cnn-crf with Apache License 2.0 | 6 votes |
def _backward(gamma, mask): '''Backward recurrence of the linear chain crf.''' gamma = K.cast(gamma, 'int32') def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) if mask is not None: mask = K.cast(mask, dtype='int32') # mask output y *= mask # set masked values to -1 y += -(1 - mask) return y
Example #14
Source File: ChainCRF.py From elmo-bilstm-cnn-crf with Apache License 2.0 | 6 votes |
def _forward(x, reduce_step, initial_states, U, mask=None): '''Forward recurrence of the linear chain crf.''' def _forward_step(energy_matrix_t, states): alpha_tm1 = states[-1] new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t) return new_states[0], new_states U_shared = K.expand_dims(K.expand_dims(U, 0), 0) if mask is not None: mask = K.cast(mask, K.floatx()) mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3) U_shared = U_shared * mask_U inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1) last, values, _ = K.rnn(_forward_step, inputs, initial_states) return last, values
Example #15
Source File: layers.py From delft with Apache License 2.0 | 6 votes |
def _backward(gamma, mask): """Backward recurrence of the linear chain crf.""" gamma = K.cast(gamma, 'int32') def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) if mask is not None: mask = K.cast(mask, dtype='int32') # mask output y *= mask # set masked values to -1 y += -(1 - mask) return y
Example #16
Source File: PointerLSTM.py From pointer-networks-experiments with BSD 2-Clause "Simplified" License | 6 votes |
def call(self, x, mask=None): input_shape = self.input_spec[0].shape en_seq = x x_input = x[:, input_shape[1]-1, :] x_input = K.repeat(x_input, input_shape[1]) initial_states = self.get_initial_states(x_input) constants = super(PointerLSTM, self).get_constants(x_input) constants.append(en_seq) preprocessed_input = self.preprocess_input(x_input) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, constants=constants, input_length=input_shape[1]) return outputs
Example #17
Source File: layer_crf_bojone.py From nlp_xiaojiang with MIT License | 5 votes |
def loss(self, y_true, y_pred): # 目标y_pred需要是one hot形式 mask = 1 - y_true[:, 1:, -1] if self.ignore_last_label else None y_true, y_pred = y_true[:, :, :self.num_labels], y_pred[:, :, :self.num_labels] init_states = [y_pred[:, 0]] # 初始状态 log_norm, _, _ = K.rnn(self.log_norm_step, y_pred[:, 1:], init_states, mask=mask) # 计算Z向量(对数) log_norm = K.logsumexp(log_norm, 1, keepdims=True) # 计算Z(对数) path_score = self.path_score(y_pred, y_true) # 计算分子(对数) return log_norm - path_score # 即log(分子/分母)
Example #18
Source File: keras_bert_layer.py From nlp_xiaojiang with MIT License | 5 votes |
def step(self, input_energy_t, states, return_logZ=True): # not in the following `prev_target_val` has shape = (B, F) # where B = batch_size, F = output feature dim # Note: `i` is of float32, due to the behavior of `K.rnn` prev_target_val, i, chain_energy = states[:3] t = K.cast(i[0, 0], dtype='int32') if len(states) > 3: if K.backend() == 'theano': m = states[3][:, t:(t + 2)] else: m = K.tf.slice(states[3], [0, t], [-1, 2]) input_energy_t = input_energy_t * K.expand_dims(m[:, 0]) # (1, F, F)*(B, 1, 1) -> (B, F, F) chain_energy = chain_energy * K.expand_dims( K.expand_dims(m[:, 0] * m[:, 1])) if return_logZ: # shapes: (1, B, F) + (B, F, 1) -> (B, F, F) energy = chain_energy + K.expand_dims(input_energy_t - prev_target_val, 2) new_target_val = K.logsumexp(-energy, 1) # shapes: (B, F) return new_target_val, [new_target_val, i + 1] else: energy = chain_energy + K.expand_dims(input_energy_t + prev_target_val, 2) min_energy = K.min(energy, 1) # cast for tf-version `K.rnn argmin_table = K.cast(K.argmin(energy, 1), K.floatx()) return argmin_table, [min_energy, i + 1]
Example #19
Source File: keras_bert_layer.py From nlp_xiaojiang with MIT License | 5 votes |
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy( input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, # as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) # matrix instead of vector is required by tf `K.rnn` initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] if K.backend() == 'theano': initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)] def gather_each_row(params, indices): n = K.shape(indices)[0] if K.backend() == 'theano': return params[K.T.arange(n), indices] else: indices = K.transpose(K.stack([K.tf.range(n), indices])) return K.tf.gather_nd(params, indices) def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': next_best_idx = K.T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)
Example #20
Source File: nse.py From neural-semantic-encoders with Apache License 2.0 | 5 votes |
def compose_and_write_step(self, o_t, states): ''' This method is a step function that updates the memory at each time step and produces a new output vector (Equations 2 to 6 in the paper). The memory_state is flattened because K.rnn requires all states to be of the same shape as the output, because it uses the same mask for the output and the states. Inputs: o_t (batch_size, output_dim) states (list[Tensor]) flattened_mem_tm1 (batch_size, input_length * output_dim) writer_h_tm1 (batch_size, output_dim) writer_c_tm1 (batch_size, output_dim) Outputs: h_t (batch_size, output_dim) flattened_mem_t (batch_size, input_length * output_dim) ''' flattened_mem_tm1, writer_h_tm1, writer_c_tm1 = states input_mem_shape = K.shape(flattened_mem_tm1) mem_tm1_shape = (input_mem_shape[0], input_mem_shape[1]/self.output_dim, self.output_dim) mem_tm1 = K.reshape(flattened_mem_tm1, mem_tm1_shape) # (batch_size, input_length, output_dim) z_t, m_rt = self.summarize_memory(o_t, mem_tm1) c_t = self.compose_memory_and_output([o_t, m_rt]) # Collecting the necessary variables to directly call writer's step function. writer_constants = self.writer.get_constants(c_t) # returns dropouts for W and U (all 1s, see init) writer_states = [writer_h_tm1, writer_c_tm1] + writer_constants # Making a call to writer's step function, Equation 5 h_t, [_, writer_c_t] = self.writer.step(c_t, writer_states) # h_t, writer_c_t: (batch_size, output_dim) mem_t = self.update_memory(z_t, h_t, mem_tm1) flattened_mem_t = K.batch_flatten(mem_t) return h_t, [flattened_mem_t, h_t, writer_c_t]
Example #21
Source File: QnA.py From recurrent-attention-for-QA-SQUAD-based-on-keras with MIT License | 5 votes |
def call(self, x, mask=None): # input shape: (nb_samples, time (padded with zeros), input_dim) # note that the .build() method of subclasses MUST define # self.input_spec with a complete input shape. input_shape = self.input_spec[0].shape if K._BACKEND == 'tensorflow': if not input_shape[1]: raise Exception('When using TensorFlow, you should define ' 'explicitly the number of timesteps of ' 'your sequences.\n' 'If your first layer is an Embedding, ' 'make sure to pass it an "input_length" ' 'argument. Otherwise, make sure ' 'the first layer has ' 'an "input_shape" or "batch_input_shape" ' 'argument, including the time axis. ' 'Found input shape at layer ' + self.name + ': ' + str(input_shape)) if self.layer.stateful: initial_states = self.layer.states else: initial_states = self.layer.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.layer.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.layer.go_backwards, mask=mask, constants=constants, unroll=self.layer.unroll, input_length=input_shape[1]) if self.layer.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.layer.states[i], states[i])) if self.layer.return_sequences: return outputs else:
Example #22
Source File: layers.py From recurrent-attention-for-QA-SQUAD-based-on-keras with MIT License | 5 votes |
def call(self, x, mask=None): # input shape: (nb_samples, time (padded with zeros), input_dim) # note that the .build() method of subclasses MUST define # self.input_spec with a complete input shape. input_shape = self.input_spec[0].shape if K._BACKEND == 'tensorflow': if not input_shape[1]: raise Exception('When using TensorFlow, you should define ' 'explicitly the number of timesteps of ' 'your sequences.\n' 'If your first layer is an Embedding, ' 'make sure to pass it an "input_length" ' 'argument. Otherwise, make sure ' 'the first layer has ' 'an "input_shape" or "batch_input_shape" ' 'argument, including the time axis. ' 'Found input shape at layer ' + self.name + ': ' + str(input_shape)) if self.layer.stateful: initial_states = self.layer.states else: initial_states = self.layer.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.layer.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.layer.go_backwards, mask=mask, constants=constants, unroll=self.layer.unroll, input_length=input_shape[1]) if self.layer.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.layer.states[i], states[i])) if self.layer.return_sequences: return outputs else: return last_output
Example #23
Source File: nse.py From onto-lstm with Apache License 2.0 | 5 votes |
def step(self, input_t, states): ''' This method is a step function that updates the memory at each time step and produces a new output vector (Equations 1 to 6 in the paper). The memory_state is flattened because K.rnn requires all states to be of the same shape as the output, because it uses the same mask for the output and the states. Inputs: input_t (batch_size, input_dim) states (list[Tensor]) flattened_mem_tm1 (batch_size, input_length * output_dim) writer_h_tm1 (batch_size, output_dim) writer_c_tm1 (batch_size, output_dim) Outputs: h_t (batch_size, output_dim) flattened_mem_t (batch_size, input_length * output_dim) ''' reader_states, flattened_mem_tm1, writer_states = self.split_states(states) input_mem_shape = K.shape(flattened_mem_tm1) mem_tm1_shape = (input_mem_shape[0], input_mem_shape[1]/self.output_dim, self.output_dim) mem_tm1 = K.reshape(flattened_mem_tm1, mem_tm1_shape) # (batch_size, input_length, output_dim) reader_constants = self.reader.get_constants(input_t) # Does not depend on input_t, see init. reader_states = reader_states[:2] + reader_constants + reader_states[2:] o_t, [_, reader_c_t] = self.reader.step(input_t, reader_states) # o_t, reader_c_t: (batch_size, output_dim) z_t, m_rt = self.summarize_memory(o_t, mem_tm1) c_t = self.compose_memory_and_output([o_t, m_rt]) # Collecting the necessary variables to directly call writer's step function. writer_constants = self.writer.get_constants(c_t) # returns dropouts for W and U (all 1s, see init) writer_states += writer_constants # Making a call to writer's step function, Equation 5 h_t, [_, writer_c_t] = self.writer.step(c_t, writer_states) # h_t, writer_c_t: (batch_size, output_dim) mem_t = self.update_memory(z_t, h_t, mem_tm1) flattened_mem_t = K.batch_flatten(mem_t) return h_t, [o_t, reader_c_t, flattened_mem_t, h_t, writer_c_t]
Example #24
Source File: nse.py From onto-lstm with Apache License 2.0 | 5 votes |
def loop(self, x, initial_states, mask): # This is a separate method because Ontoaware variants will have to override this to make a call # to changingdim rnn. last_output, all_outputs, last_states = K.rnn(self.step, x, initial_states, mask=mask) return last_output, all_outputs, last_states
Example #25
Source File: layers.py From sequence-tagging-ner with Apache License 2.0 | 5 votes |
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] # matrix instead of vector is required by tf `K.rnn` if K.backend() == 'theano': initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)] def gather_each_row(params, indices): n = K.shape(indices)[0] if K.backend() == 'theano': return params[K.T.arange(n), indices] else: indices = K.transpose(K.stack([K.tf.range(n), indices])) return K.tf.gather_nd(params, indices) def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': next_best_idx = K.T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)
Example #26
Source File: time_distributed.py From deep_qa with Apache License 2.0 | 5 votes |
def call(self, inputs, mask=None): # Much of this is copied from the Keras 1.0(ish) version of TimeDistributed, though we've # modified it quite a bit, to fix the problems mentioned in the docstring and to use better # names. if not isinstance(inputs, list): inputs = [inputs] mask = [mask] else: if mask is None: mask = [None] * len(inputs) timesteps = K.int_shape(inputs[0])[1] input_shape = [K.int_shape(x_i) for x_i in inputs] if len(inputs) == 1: input_shape = input_shape[0] if len(inputs) == 1 and input_shape[0]: # The batch size is passed when defining the layer in some cases (for example if it is # stateful). We respect the input shape in that case and don't reshape the input. This # is slower. K.rnn also expects only a single tensor, so we can't do this if we have # multiple inputs. inputs = inputs[0] mask = mask[0] def step(x_i, _): output = self.layer.call(x_i) return output, [] _, outputs, _ = K.rnn(step, inputs, mask=mask, initial_states=[]) else: reshaped_xs, reshaped_masks = self.reshape_inputs_and_masks(inputs, mask) outputs = self.layer.call(reshaped_xs, mask=reshaped_masks) output_shape = self.compute_output_shape(input_shape) reshaped_shape = (-1, timesteps) + output_shape[2:] if reshaped_shape[-1] == 1 and not self.keep_dims: reshaped_shape = reshaped_shape[:-1] outputs = K.reshape(outputs, reshaped_shape) return outputs
Example #27
Source File: backend_test.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def legacy_test_rnn_no_states(self): # implement a simple RNN without states input_dim = 8 output_dim = 4 timesteps = 5 input_val = np.random.random((32, timesteps, input_dim)) W_i_val = np.random.random((input_dim, output_dim)) def rnn_step_fn(k): W_i = k.variable(W_i_val) def step_function(x, states): assert len(states) == 0 output = k.dot(x, W_i) return output, [] return step_function # test default setup last_output_list = [] outputs_list = [] for k in BACKENDS: rnn_fn = rnn_step_fn(k) inputs = k.variable(input_val) initial_states = [] last_output, outputs, new_states = k.rnn(rnn_fn, inputs, initial_states, go_backwards=False, mask=None) last_output_list.append(k.eval(last_output)) outputs_list.append(k.eval(outputs)) assert len(new_states) == 0 assert_list_pairwise(last_output_list, shape=False) assert_list_pairwise(outputs_list, shape=False)
Example #28
Source File: layers.py From anago with MIT License | 5 votes |
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] # matrix instead of vector is required by tf `K.rnn` if K.backend() == 'theano': initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)] def gather_each_row(params, indices): n = K.shape(indices)[0] if K.backend() == 'theano': return params[K.T.arange(n), indices] else: indices = K.transpose(K.stack([K.tf.range(n), indices])) return K.tf.gather_nd(params, indices) def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': next_best_idx = K.T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)
Example #29
Source File: attention_lstm.py From keras-language-modeling with MIT License | 5 votes |
def call(self, x, mask=None): # input shape: (nb_samples, time (padded with zeros), input_dim) # note that the .build() method of subclasses MUST define # self.input_spec with a complete input shape. input_shape = self.input_spec[0].shape if K._BACKEND == 'tensorflow': if not input_shape[1]: raise Exception('When using TensorFlow, you should define ' 'explicitly the number of timesteps of ' 'your sequences.\n' 'If your first layer is an Embedding, ' 'make sure to pass it an "input_length" ' 'argument. Otherwise, make sure ' 'the first layer has ' 'an "input_shape" or "batch_input_shape" ' 'argument, including the time axis. ' 'Found input shape at layer ' + self.name + ': ' + str(input_shape)) if self.layer.stateful: initial_states = self.layer.states else: initial_states = self.layer.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.layer.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.layer.go_backwards, mask=mask, constants=constants, unroll=self.layer.unroll, input_length=input_shape[1]) if self.layer.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.layer.states[i], states[i])) if self.layer.return_sequences: return outputs else: return last_output
Example #30
Source File: layers.py From indic_tagger with Apache License 2.0 | 5 votes |
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] # matrix instead of vector is required by tf `K.rnn` if K.backend() == 'theano': initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)] def gather_each_row(params, indices): n = K.shape(indices)[0] if K.backend() == 'theano': return params[K.T.arange(n), indices] else: indices = K.transpose(K.stack([K.tf.range(n), indices])) return K.tf.gather_nd(params, indices) def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': next_best_idx = K.T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)