Python theano.tensor.roll() Examples
The following are 8
code examples of theano.tensor.roll().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
theano.tensor
, or try the search function
.
Example #1
Source File: main.py From attention-lvcsr with MIT License | 5 votes |
def add_exploration(recognizer, data, train_conf): prediction = None prediction_mask = None explore_conf = train_conf.get('exploration', 'imitative') if explore_conf in ['greedy', 'mixed']: length_expand = 10 prediction = recognizer.get_generate_graph( n_steps=recognizer.labels.shape[0] + length_expand)['outputs'] prediction_mask = tensor.lt( tensor.cumsum(tensor.eq(prediction, data.eos_label), axis=0), 1).astype(floatX) prediction_mask = tensor.roll(prediction_mask, 1, 0) prediction_mask = tensor.set_subtensor( prediction_mask[0, :], tensor.ones_like(prediction_mask[0, :])) if explore_conf == 'mixed': batch_size = recognizer.labels.shape[1] targets = tensor.concatenate([ recognizer.labels, tensor.zeros((length_expand, batch_size), dtype='int64')]) targets_mask = tensor.concatenate([ recognizer.labels_mask, tensor.zeros((length_expand, batch_size), dtype=floatX)]) rng = MRG_RandomStreams() generate = rng.binomial((batch_size,), p=0.5, dtype='int64') prediction = (generate[None, :] * prediction + (1 - generate[None, :]) * targets) prediction_mask = (tensor.cast(generate[None, :] * prediction_mask, floatX) + tensor.cast((1 - generate[None, :]) * targets_mask, floatX)) prediction_mask = theano.gradient.disconnected_grad(prediction_mask) elif explore_conf != 'imitative': raise ValueError return prediction, prediction_mask
Example #2
Source File: utils_.py From kusanagi with MIT License | 5 votes |
def fast_jacobian(expr, wrt, chunk_size=16, func=None): ''' Computes the jacobian by tiling the inputs Copied from https://gist.github.com/aam-at/2b2bc5c35850b553d4ec ''' assert isinstance(expr, Variable), \ "tensor.jacobian expects a Variable as `expr`" assert expr.ndim < 2, \ ("tensor.jacobian expects a 1 dimensional variable as " "`expr`. If not use flatten to make it a vector") num_chunks = tt.ceil(1.0 * expr.shape[0] / chunk_size) num_chunks = tt.cast(num_chunks, 'int32') steps = tt.arange(num_chunks) remainder = expr.shape[0] % chunk_size def chunk_grad(i): ''' operates on a subset of the gradient variables ''' wrt_rep = tt.tile(wrt, (chunk_size, 1)) if func is not None: expr_rep = func(wrt_rep) else: expr_rep, _ = theano.scan( fn=lambda wrt_: theano.clone(expr, {wrt: wrt_}), sequences=wrt_rep) chunk_expr_grad = tt.roll( tt.identity_like(expr_rep), i * chunk_size, axis=1) return tt.grad(cost=None, wrt=wrt_rep, known_grads={ expr_rep: chunk_expr_grad }) grads, _ = theano.scan(chunk_grad, sequences=steps) grads = grads.reshape((chunk_size * grads.shape[0], wrt.shape[0])) jac = ifelse.ifelse(tt.eq(remainder, 0), grads, grads[:expr.shape[0], :]) return jac
Example #3
Source File: timesout.py From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License | 5 votes |
def activation(self, network, in_vw): in_var = in_vw.variable return in_var * T.roll(in_var, shift=1, axis=1)
Example #4
Source File: timesout.py From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License | 5 votes |
def activation(self, network, in_vw): in_var = in_vw.variable return in_var * T.roll(in_var, shift=1, axis=1)
Example #5
Source File: timesout.py From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License | 5 votes |
def activation(self, network, in_vw): in_var = in_vw.variable return in_var * T.roll(in_var, shift=1, axis=1)
Example #6
Source File: cnn_rnn.py From transfer with MIT License | 5 votes |
def get_output_for(self, input, **kwargs): def norm_fn(f, mask, label, previous, W_sim): # f: inst * class, mask: inst, previous: inst * class, W_sim: class * class next = previous.dimshuffle(0, 1, 'x') + f.dimshuffle(0, 'x', 1) + W_sim.dimshuffle('x', 0, 1) if COST: next = next + COST_CONST * (1.0 - T.extra_ops.to_one_hot(label, self.num_classes).dimshuffle(0, 'x', 1)) # next: inst * prev * cur next = theano_logsumexp(next, axis = 1) # next: inst * class mask = mask.dimshuffle(0, 'x') next = previous * (1.0 - mask) + next * mask return next f = T.dot(input, self.W) # f: inst * time * class initial = f[:, 0, :] if CRF_INIT: initial = initial + self.W_init[0].dimshuffle('x', 0) if COST: initial = initial + COST_CONST * (1.0 - T.extra_ops.to_one_hot(self.label_input[:, 0], self.num_classes)) outputs, _ = theano.scan(fn = norm_fn, \ sequences = [f.dimshuffle(1, 0, 2)[1: ], self.mask_input.dimshuffle(1, 0)[1: ], self.label_input.dimshuffle(1, 0)[1:]], \ outputs_info = initial, non_sequences = [self.W_sim], strict = True) norm = T.sum(theano_logsumexp(outputs[-1], axis = 1)) f_pot = (f.reshape((-1, f.shape[-1]))[T.arange(f.shape[0] * f.shape[1]), self.label_input.flatten()] * self.mask_input.flatten()).sum() if CRF_INIT: f_pot += self.W_init[0][self.label_input[:, 0]].sum() labels = self.label_input # labels: inst * time shift_labels = T.roll(labels, -1, axis = 1) mask = self.mask_input # mask : inst * time shift_mask = T.roll(mask, -1, axis = 1) g_pot = (self.W_sim[labels.flatten(), shift_labels.flatten()] * mask.flatten() * shift_mask.flatten()).sum() return - (f_pot + g_pot - norm) / f.shape[0]
Example #7
Source File: timesout.py From treeano with Apache License 2.0 | 5 votes |
def activation(self, network, in_vw): in_var = in_vw.variable return in_var * T.roll(in_var, shift=1, axis=1)
Example #8
Source File: sequence_generators.py From attention-lvcsr with MIT License | 4 votes |
def evaluate(self, application_call, outputs, mask=None, **kwargs): # We assume the data has axes (time, batch, features, ...) batch_size = outputs.shape[1] # Prepare input for the iterative part states = dict_subset(kwargs, self._state_names, must_have=False) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) feedback = self.readout.feedback(outputs) inputs = self.fork.apply(feedback, as_dict=True) # Run the recurrent network results = self.transition.apply( mask=mask, return_initial_states=True, as_dict=True, **dict_union(inputs, states, contexts)) # Separate the deliverables. The last states are discarded: they # are not used to predict any output symbol. The initial glimpses # are discarded because they are not used for prediction. # Remember, glimpses are computed _before_ output stage, states are # computed after. states = OrderedDict((name, results[name][:-1]) for name in self._state_names) glimpses = OrderedDict((name, results[name][1:]) for name in self._glimpse_names) # Compute the cost feedback = tensor.roll(feedback, 1, 0) feedback = tensor.set_subtensor( feedback[0], self.readout.feedback(self.readout.initial_outputs(batch_size))) # Run the language model if self.language_model: lm_states = self.language_model.evaluate( outputs=outputs, mask=mask, as_dict=True) lm_states = {'lm_' + name: value for name, value in lm_states.items()} else: lm_states = {} readouts = self.readout.readout( feedback=feedback, **dict_union(lm_states, states, glimpses, contexts)) costs = self.readout.cost(readouts, outputs) if mask is not None: costs *= mask for name, variable in list(glimpses.items()) + list(states.items()): application_call.add_auxiliary_variable( variable.copy(), name=name) # This variables can be used to initialize the initial states of the # next batch using the last states of the current batch. for name in self._state_names + self._glimpse_names: application_call.add_auxiliary_variable( results[name][-1].copy(), name=name+"_final_value") return [costs] + states.values() + glimpses.values()