Python theano.tensor.roll() Examples

The following are 8 code examples of theano.tensor.roll(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module theano.tensor , or try the search function .
Example #1
Source File: main.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def add_exploration(recognizer, data, train_conf):

    prediction = None
    prediction_mask = None
    explore_conf = train_conf.get('exploration', 'imitative')
    if explore_conf in ['greedy', 'mixed']:
        length_expand = 10
        prediction = recognizer.get_generate_graph(
            n_steps=recognizer.labels.shape[0] + length_expand)['outputs']
        prediction_mask = tensor.lt(
            tensor.cumsum(tensor.eq(prediction, data.eos_label), axis=0),
            1).astype(floatX)
        prediction_mask = tensor.roll(prediction_mask, 1, 0)
        prediction_mask = tensor.set_subtensor(
            prediction_mask[0, :], tensor.ones_like(prediction_mask[0, :]))

        if explore_conf == 'mixed':
            batch_size = recognizer.labels.shape[1]
            targets = tensor.concatenate([
                recognizer.labels,
                tensor.zeros((length_expand, batch_size), dtype='int64')])

            targets_mask = tensor.concatenate([
                recognizer.labels_mask,
                tensor.zeros((length_expand, batch_size), dtype=floatX)])
            rng = MRG_RandomStreams()
            generate = rng.binomial((batch_size,), p=0.5, dtype='int64')
            prediction = (generate[None, :] * prediction +
                          (1 - generate[None, :]) * targets)
            prediction_mask = (tensor.cast(generate[None, :] *
                                           prediction_mask, floatX) +
                               tensor.cast((1 - generate[None, :]) *
                                           targets_mask, floatX))

        prediction_mask = theano.gradient.disconnected_grad(prediction_mask)
    elif explore_conf != 'imitative':
        raise ValueError

    return prediction, prediction_mask 
Example #2
Source File: utils_.py    From kusanagi with MIT License 5 votes vote down vote up
def fast_jacobian(expr, wrt, chunk_size=16, func=None):
    '''
    Computes the jacobian by tiling the inputs
    Copied from https://gist.github.com/aam-at/2b2bc5c35850b553d4ec
    '''
    assert isinstance(expr, Variable), \
        "tensor.jacobian expects a Variable as `expr`"
    assert expr.ndim < 2, \
        ("tensor.jacobian expects a 1 dimensional variable as "
         "`expr`. If not use flatten to make it a vector")

    num_chunks = tt.ceil(1.0 * expr.shape[0] / chunk_size)
    num_chunks = tt.cast(num_chunks, 'int32')
    steps = tt.arange(num_chunks)
    remainder = expr.shape[0] % chunk_size

    def chunk_grad(i):
        ''' operates on a subset of the gradient variables '''
        wrt_rep = tt.tile(wrt, (chunk_size, 1))
        if func is not None:
            expr_rep = func(wrt_rep)
        else:
            expr_rep, _ = theano.scan(
                fn=lambda wrt_: theano.clone(expr, {wrt: wrt_}),
                sequences=wrt_rep)
        chunk_expr_grad = tt.roll(
            tt.identity_like(expr_rep),
            i * chunk_size,
            axis=1)
        return tt.grad(cost=None,
                       wrt=wrt_rep,
                       known_grads={
                           expr_rep: chunk_expr_grad
                       })

    grads, _ = theano.scan(chunk_grad, sequences=steps)
    grads = grads.reshape((chunk_size * grads.shape[0], wrt.shape[0]))
    jac = ifelse.ifelse(tt.eq(remainder, 0), grads, grads[:expr.shape[0], :])
    return jac 
Example #3
Source File: timesout.py    From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def activation(self, network, in_vw):
        in_var = in_vw.variable
        return in_var * T.roll(in_var, shift=1, axis=1) 
Example #4
Source File: timesout.py    From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def activation(self, network, in_vw):
        in_var = in_vw.variable
        return in_var * T.roll(in_var, shift=1, axis=1) 
Example #5
Source File: timesout.py    From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def activation(self, network, in_vw):
        in_var = in_vw.variable
        return in_var * T.roll(in_var, shift=1, axis=1) 
Example #6
Source File: cnn_rnn.py    From transfer with MIT License 5 votes vote down vote up
def get_output_for(self, input, **kwargs):
        def norm_fn(f, mask, label, previous, W_sim):
            # f: inst * class, mask: inst, previous: inst * class, W_sim: class * class
            next = previous.dimshuffle(0, 1, 'x') + f.dimshuffle(0, 'x', 1) + W_sim.dimshuffle('x', 0, 1)
            if COST:
                next = next + COST_CONST * (1.0 - T.extra_ops.to_one_hot(label, self.num_classes).dimshuffle(0, 'x', 1))
            # next: inst * prev * cur
            next = theano_logsumexp(next, axis = 1)
            # next: inst * class
            mask = mask.dimshuffle(0, 'x')
            next = previous * (1.0 - mask) + next * mask
            return next

        f = T.dot(input, self.W)
        # f: inst * time * class

        initial = f[:, 0, :]
        if CRF_INIT:
            initial = initial + self.W_init[0].dimshuffle('x', 0)
        if COST:
            initial = initial + COST_CONST * (1.0 - T.extra_ops.to_one_hot(self.label_input[:, 0], self.num_classes))
        outputs, _ = theano.scan(fn = norm_fn, \
         sequences = [f.dimshuffle(1, 0, 2)[1: ], self.mask_input.dimshuffle(1, 0)[1: ], self.label_input.dimshuffle(1, 0)[1:]], \
         outputs_info = initial, non_sequences = [self.W_sim], strict = True)
        norm = T.sum(theano_logsumexp(outputs[-1], axis = 1))

        f_pot = (f.reshape((-1, f.shape[-1]))[T.arange(f.shape[0] * f.shape[1]), self.label_input.flatten()] * self.mask_input.flatten()).sum()
        if CRF_INIT:
            f_pot += self.W_init[0][self.label_input[:, 0]].sum()

        labels = self.label_input
        # labels: inst * time
        shift_labels = T.roll(labels, -1, axis = 1)
        mask = self.mask_input
        # mask : inst * time
        shift_mask = T.roll(mask, -1, axis = 1)

        g_pot = (self.W_sim[labels.flatten(), shift_labels.flatten()] * mask.flatten() * shift_mask.flatten()).sum()

        return - (f_pot + g_pot - norm) / f.shape[0] 
Example #7
Source File: timesout.py    From treeano with Apache License 2.0 5 votes vote down vote up
def activation(self, network, in_vw):
        in_var = in_vw.variable
        return in_var * T.roll(in_var, shift=1, axis=1) 
Example #8
Source File: sequence_generators.py    From attention-lvcsr with MIT License 4 votes vote down vote up
def evaluate(self, application_call, outputs, mask=None, **kwargs):
        # We assume the data has axes (time, batch, features, ...)
        batch_size = outputs.shape[1]

        # Prepare input for the iterative part
        states = dict_subset(kwargs, self._state_names, must_have=False)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        feedback = self.readout.feedback(outputs)
        inputs = self.fork.apply(feedback, as_dict=True)

        # Run the recurrent network
        results = self.transition.apply(
            mask=mask, return_initial_states=True, as_dict=True,
            **dict_union(inputs, states, contexts))

        # Separate the deliverables. The last states are discarded: they
        # are not used to predict any output symbol. The initial glimpses
        # are discarded because they are not used for prediction.
        # Remember, glimpses are computed _before_ output stage, states are
        # computed after.
        states = OrderedDict((name, results[name][:-1]) for name in self._state_names)
        glimpses = OrderedDict((name, results[name][1:]) for name in self._glimpse_names)

        # Compute the cost
        feedback = tensor.roll(feedback, 1, 0)
        feedback = tensor.set_subtensor(
            feedback[0],
            self.readout.feedback(self.readout.initial_outputs(batch_size)))

        # Run the language model
        if self.language_model:
            lm_states = self.language_model.evaluate(
                outputs=outputs, mask=mask, as_dict=True)
            lm_states = {'lm_' + name: value for name, value
                         in lm_states.items()}
        else:
            lm_states = {}

        readouts = self.readout.readout(
            feedback=feedback,
            **dict_union(lm_states, states, glimpses, contexts))
        costs = self.readout.cost(readouts, outputs)
        if mask is not None:
            costs *= mask

        for name, variable in list(glimpses.items()) + list(states.items()):
            application_call.add_auxiliary_variable(
                variable.copy(), name=name)

        # This variables can be used to initialize the initial states of the
        # next batch using the last states of the current batch.
        for name in self._state_names + self._glimpse_names:
            application_call.add_auxiliary_variable(
                results[name][-1].copy(), name=name+"_final_value")

        return [costs] + states.values() + glimpses.values()