Python theano.tensor.ivector() Examples

The following are 30 code examples of theano.tensor.ivector(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module theano.tensor , or try the search function .
Example #1
Source File: dnn.py    From DL4H with MIT License 6 votes vote down vote up
def get_SGD_trainer(self):
        """ Returns a plain SGD minibatch trainer with learning rate as param. """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        learning_rate = T.fscalar('lr')  # learning rate
        gparams = T.grad(self.mean_cost, self.params)  # all the gradients
        updates = OrderedDict()
        for param, gparam in zip(self.params, gparams):
            updates[param] = param - gparam * learning_rate

        train_fn = theano.function(inputs=[theano.Param(batch_x),
                                           theano.Param(batch_y),
                                           theano.Param(learning_rate)],
                                   outputs=self.mean_cost,
                                   updates=updates,
                                   givens={self.x: batch_x, self.y: batch_y})

        return train_fn 
Example #2
Source File: test_basic.py    From D-VAE with MIT License 6 votes vote down vote up
def test_csm(self):
        sp_types = {'csc': sp.csc_matrix,
                    'csr': sp.csr_matrix}

        for format in ['csc', 'csr']:
            for dtype in ['float32', 'float64']:
                x = tensor.tensor(dtype=dtype, broadcastable=(False,))
                y = tensor.ivector()
                z = tensor.ivector()
                s = tensor.ivector()
                f = theano.function([x, y, z, s], CSM(format)(x, y, z, s))

                spmat = sp_types[format](random_lil((4, 3), dtype, 3))

                res = f(spmat.data, spmat.indices, spmat.indptr,
                        numpy.asarray(spmat.shape, 'int32'))

                assert numpy.all(res.data == spmat.data)
                assert numpy.all(res.indices == spmat.indices)
                assert numpy.all(res.indptr == spmat.indptr)
                assert numpy.all(res.shape == spmat.shape) 
Example #3
Source File: test_basic.py    From D-VAE with MIT License 6 votes vote down vote up
def test_csm_unsorted(self):
        """
        Test support for gradients of unsorted inputs.
        """
        sp_types = {'csc': sp.csc_matrix,
                    'csr': sp.csr_matrix}

        for format in ['csr', 'csc', ]:
            for dtype in ['float32', 'float64']:
                x = tensor.tensor(dtype=dtype, broadcastable=(False,))
                y = tensor.ivector()
                z = tensor.ivector()
                s = tensor.ivector()
                # Sparse advanced indexing produces unsorted sparse matrices
                a = sparse_random_inputs(format, (4, 3), out_dtype=dtype,
                                         unsorted_indices=True)[1][0]
                # Make sure it's unsorted
                assert not a.has_sorted_indices
                def my_op(x):
                    y = tensor.constant(a.indices)
                    z = tensor.constant(a.indptr)
                    s = tensor.constant(a.shape)
                    return tensor.sum(
                        dense_from_sparse(CSM(format)(x, y, z, s) * a))
                verify_grad_sparse(my_op, [a.data]) 
Example #4
Source File: test_opt.py    From D-VAE with MIT License 6 votes vote down vote up
def test_local_csm_grad_c():
    raise SkipTest("Opt disabled as it don't support unsorted indices")
    if not theano.config.cxx:
        raise SkipTest("G++ not available, so we need to skip this test.")
    data = tensor.vector()
    indices, indptr, shape = (tensor.ivector(), tensor.ivector(),
                              tensor.ivector())
    mode = theano.compile.mode.get_default_mode()

    if theano.config.mode == 'FAST_COMPILE':
        mode = theano.compile.Mode(linker='c|py', optimizer='fast_compile')

    mode = mode.including("specialize", "local_csm_grad_c")
    for CS, cast in [(sparse.CSC, sp.csc_matrix), (sparse.CSR, sp.csr_matrix)]:
        cost = tensor.sum(sparse.DenseFromSparse()(CS(data, indices, indptr, shape)))
        f = theano.function(
            [data, indices, indptr, shape],
            tensor.grad(cost, data),
            mode=mode)
        assert not any(isinstance(node.op, sparse.CSMGrad) for node
                       in f.maker.fgraph.toposort())
        v = cast(random_lil((10, 40),
                            config.floatX, 3))
        f(v.data, v.indices, v.indptr, v.shape) 
Example #5
Source File: test_opt.py    From D-VAE with MIT License 6 votes vote down vote up
def test_local_csm_properties_csm():
    data = tensor.vector()
    indices, indptr, shape = (tensor.ivector(), tensor.ivector(),
                              tensor.ivector())
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_csm_properties_csm")
    for CS, cast in [(sparse.CSC, sp.csc_matrix),
                     (sparse.CSR, sp.csr_matrix)]:
        f = theano.function([data, indices, indptr, shape],
                            sparse.csm_properties(
                                CS(data, indices, indptr, shape)),
                            mode=mode)
        assert not any(
            isinstance(node.op, (sparse.CSM, sparse.CSMProperties))
            for node in f.maker.fgraph.toposort())
        v = cast(random_lil((10, 40),
                            config.floatX, 3))
        f(v.data, v.indices, v.indptr, v.shape) 
Example #6
Source File: test_basic.py    From D-VAE with MIT License 6 votes vote down vote up
def test_csm_grad(self):
        for sparsetype in ('csr', 'csc'):
            x = tensor.vector()
            y = tensor.ivector()
            z = tensor.ivector()
            s = tensor.ivector()
            call = getattr(sp, sparsetype + '_matrix')
            spm = call(random_lil((300, 400), config.floatX, 5))
            out = tensor.grad(dense_from_sparse(
                CSM(sparsetype)(x, y, z, s)
            ).sum(), x)
            self._compile_and_check([x, y, z, s],
                                    [out],
                                    [spm.data, spm.indices, spm.indptr,
                                     spm.shape],
                                    (CSMGrad, CSMGradC)
                                   ) 
Example #7
Source File: dnn.py    From DL4H with MIT License 6 votes vote down vote up
def get_adagrad_trainer(self):
        """ Returns an Adagrad (Duchi et al. 2010) trainer using a learning rate.
        """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        learning_rate = T.fscalar('lr')  # learning rate
        gparams = T.grad(self.mean_cost, self.params)  # all the gradients
        updates = OrderedDict()
        for accugrad, param, gparam in zip(self._accugrads, self.params, gparams):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            agrad = accugrad + gparam * gparam
            dx = - (learning_rate / T.sqrt(agrad + self._eps)) * gparam
            updates[param] = param + dx
            updates[accugrad] = agrad

        train_fn = theano.function(inputs=[theano.Param(batch_x), 
            theano.Param(batch_y),
            theano.Param(learning_rate)],
            outputs=self.mean_cost,
            updates=updates,
            givens={self.x: batch_x, self.y: batch_y})

        return train_fn 
Example #8
Source File: dnn.py    From DL4H with MIT License 6 votes vote down vote up
def get_adadelta_trainer(self):
        """ Returns an Adadelta (Zeiler 2012) trainer using self._rho and
        self._eps params. """
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        gparams = T.grad(self.mean_cost, self.params)
        updates = OrderedDict()
        for accugrad, accudelta, param, gparam in zip(self._accugrads,
                self._accudeltas, self.params, gparams):
            # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
            agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam
            dx = - T.sqrt((accudelta + self._eps)
                          / (agrad + self._eps)) * gparam
            updates[accudelta] = (self._rho * accudelta
                                  + (1 - self._rho) * dx * dx)
            updates[param] = param + dx
            updates[accugrad] = agrad

        train_fn = theano.function(inputs=[theano.Param(batch_x),
                                           theano.Param(batch_y)],
                                   outputs=self.mean_cost,
                                   updates=updates,
                                   givens={self.x: batch_x, self.y: batch_y})

        return train_fn 
Example #9
Source File: agent.py    From StockRecommendSystem with MIT License 6 votes vote down vote up
def __init__(self, seq_len, n_feature):
        import theano.tensor as T
        self.Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature))
        self.buildNetwork()
        self.output = lasagne.layers.get_output(self.network)
        self.params = lasagne.layers.get_all_params(self.network, trainable=True)
        self.output_fn = theano.function([self.Input.input_var], self.output)

        fx = T.fvector().astype("float64")
        choices = T.ivector()
        px = self.output[T.arange(self.output.shape[0]), choices]
        log_px = T.log(px)
        cost = -fx.dot(log_px)
        updates = lasagne.updates.adagrad(cost, self.params, 0.0008)
        Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature))
        self.train_fn = theano.function([self.Input.input_var, choices, fx], [cost, px, log_px], updates=updates) 
Example #10
Source File: network3.py    From WannaPark with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, layers, mini_batch_size):
        """Takes a list of `layers`, describing the network architecture, and
        a value for the `mini_batch_size` to be used during training
        by stochastic gradient descent.

        """
        self.layers = layers
        self.mini_batch_size = mini_batch_size
        self.params = [param for layer in self.layers for param in layer.params]
        self.x = T.matrix("x")
        self.y = T.ivector("y")
        init_layer = self.layers[0]
        init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
        for j in xrange(1, len(self.layers)):
            prev_layer, layer  = self.layers[j-1], self.layers[j]
            layer.set_inpt(
                prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)
        self.output = self.layers[-1].output
        self.output_dropout = self.layers[-1].output_dropout 
Example #11
Source File: fr3dnet_trainer.py    From luna16 with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def __init__(self):
        metric_names = ['Loss','L2','Accuracy']
        super(Fr3dNetTrainer, self).__init__(metric_names)

        tensor5 = T.TensorType(theano.config.floatX, (False,) * 5)
        input_var = tensor5('inputs')
        target_var = T.ivector('targets')

        logging.info("Defining network")
        net = fr3dnet.define_network(input_var)
        self.network = net
        train_fn, val_fn, l_r = fr3dnet.define_updates(net, input_var, target_var)

        self.train_fn = train_fn
        self.val_fn = val_fn
        self.l_r = l_r 
Example #12
Source File: modelbase.py    From Theano-Lights with MIT License 6 votes vote down vote up
def __init__(self, id, data, hp):
        self.type = 'LM'
        self.id = id
        self.filename = 'savedmodels\model_'+id+'.pkl'
        self.hp = hp

        self.X = T.imatrix()
        self.Y = T.ivector()
        self.seed_idx = T.iscalar()

        self.X.tag.test_value = np.random.randn(hp.seq_size, hp.batch_size).astype(dtype=np.int32)

        self.data = copy.copy(data)
        for key in ('tr_X', 'va_X', 'te_X', 'tr_Y', 'va_Y', 'te_Y'):
            if key in self.data:
                self.data['len_'+key] = len(self.data[key])
                self.data[key] = shared(self.data[key], borrow=True, dtype=np.int32)
        
        if hp['debug']:
            theano.config.optimizer = 'None'
            theano.config.compute_test_value = 'ignore'
            theano.config.exception_verbosity = 'high' 
Example #13
Source File: test_opt.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_local_csm_properties_csm():
    data = tensor.vector()
    indices, indptr, shape = (tensor.ivector(), tensor.ivector(),
                              tensor.ivector())
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_csm_properties_csm")
    for CS, cast in [(sparse.CSC, sp.csc_matrix),
                     (sparse.CSR, sp.csr_matrix)]:
        f = theano.function([data, indices, indptr, shape],
                            sparse.csm_properties(
                                CS(data, indices, indptr, shape)),
                            mode=mode)
        assert not any(
            isinstance(node.op, (sparse.CSM, sparse.CSMProperties))
            for node in f.maker.fgraph.toposort())
        v = cast(random_lil((10, 40),
                            config.floatX, 3))
        f(v.data, v.indices, v.indptr, v.shape) 
Example #14
Source File: test_opt.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_local_csm_grad_c():
    raise SkipTest("Opt disabled as it don't support unsorted indices")
    if not theano.config.cxx:
        raise SkipTest("G++ not available, so we need to skip this test.")
    data = tensor.vector()
    indices, indptr, shape = (tensor.ivector(), tensor.ivector(),
                              tensor.ivector())
    mode = theano.compile.mode.get_default_mode()

    if theano.config.mode == 'FAST_COMPILE':
        mode = theano.compile.Mode(linker='c|py', optimizer='fast_compile')

    mode = mode.including("specialize", "local_csm_grad_c")
    for CS, cast in [(sparse.CSC, sp.csc_matrix), (sparse.CSR, sp.csr_matrix)]:
        cost = tensor.sum(sparse.DenseFromSparse()(CS(data, indices, indptr, shape)))
        f = theano.function(
            [data, indices, indptr, shape],
            tensor.grad(cost, data),
            mode=mode)
        assert not any(isinstance(node.op, sparse.CSMGrad) for node
                       in f.maker.fgraph.toposort())
        v = cast(random_lil((10, 40),
                            config.floatX, 3))
        f(v.data, v.indices, v.indptr, v.shape) 
Example #15
Source File: test_basic.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_csm_grad(self):
        for sparsetype in ('csr', 'csc'):
            x = tensor.vector()
            y = tensor.ivector()
            z = tensor.ivector()
            s = tensor.ivector()
            call = getattr(sp, sparsetype + '_matrix')
            spm = call(random_lil((300, 400), config.floatX, 5))
            out = tensor.grad(dense_from_sparse(
                CSM(sparsetype)(x, y, z, s)
            ).sum(), x)
            self._compile_and_check([x, y, z, s],
                                    [out],
                                    [spm.data, spm.indices, spm.indptr,
                                     spm.shape],
                                    (CSMGrad, CSMGradC)
                                   ) 
Example #16
Source File: test_basic.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_csm_unsorted(self):
        """
        Test support for gradients of unsorted inputs.
        """
        sp_types = {'csc': sp.csc_matrix,
                    'csr': sp.csr_matrix}

        for format in ['csr', 'csc', ]:
            for dtype in ['float32', 'float64']:
                x = tensor.tensor(dtype=dtype, broadcastable=(False,))
                y = tensor.ivector()
                z = tensor.ivector()
                s = tensor.ivector()
                # Sparse advanced indexing produces unsorted sparse matrices
                a = sparse_random_inputs(format, (4, 3), out_dtype=dtype,
                                         unsorted_indices=True)[1][0]
                # Make sure it's unsorted
                assert not a.has_sorted_indices
                def my_op(x):
                    y = tensor.constant(a.indices)
                    z = tensor.constant(a.indptr)
                    s = tensor.constant(a.shape)
                    return tensor.sum(
                        dense_from_sparse(CSM(format)(x, y, z, s) * a))
                verify_grad_sparse(my_op, [a.data]) 
Example #17
Source File: test_basic.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_csm(self):
        sp_types = {'csc': sp.csc_matrix,
                    'csr': sp.csr_matrix}

        for format in ['csc', 'csr']:
            for dtype in ['float32', 'float64']:
                x = tensor.tensor(dtype=dtype, broadcastable=(False,))
                y = tensor.ivector()
                z = tensor.ivector()
                s = tensor.ivector()
                f = theano.function([x, y, z, s], CSM(format)(x, y, z, s))

                spmat = sp_types[format](random_lil((4, 3), dtype, 3))

                res = f(spmat.data, spmat.indices, spmat.indptr,
                        numpy.asarray(spmat.shape, 'int32'))

                assert numpy.all(res.data == spmat.data)
                assert numpy.all(res.indices == spmat.indices)
                assert numpy.all(res.indptr == spmat.indptr)
                assert numpy.all(res.shape == spmat.shape) 
Example #18
Source File: test_pfunc.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_givens(self):
        x = shared(0)
        assign = pfunc([], x, givens={x: 3})
        assert assign() == 3
        assert x.get_value(borrow=True) == 0

        y = tensor.ivector()
        f = pfunc([y], (y * x), givens={x: 6})
        assert numpy.all(f([1, 1, 1]) == [6, 6, 6])
        assert x.get_value() == 0

        z = tensor.ivector()
        c = z * y
        f = pfunc([y], (c + 7),
                givens={z: theano._asarray([4, 4, 4], dtype='int32')})
        assert numpy.all(f([1, 1, 1]) == [11, 11, 11])
        assert x.get_value() == 0 
Example #19
Source File: test_stack.py    From spinn with MIT License 6 votes vote down vote up
def setUp(self):
        if 'gpu' not in theano.config.device:
            raise RuntimeError("Thin stack only defined for GPU usage")

        self.embedding_dim = self.model_dim = 2
        self.vocab_size = 5
        self.batch_size = 2
        self.num_classes = 2

        self.vs = VariableStore()
        self.compose_network = util.TreeLSTMLayer
        self.embedding_proj = IdentityLayer
        self.skip_embeddings = False

        self.X = T.imatrix("X")
        self.transitions = T.imatrix("transitions")
        self.y = T.ivector("y") 
Example #20
Source File: test_basic.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_csm_sparser(self):
        """
        Test support for gradients sparser than the input.
        """
        sp_types = {'csc': sp.csc_matrix,
                    'csr': sp.csr_matrix}

        for format in ['csc', 'csr']:
            for dtype in ['float32', 'float64']:
                x = tensor.tensor(dtype=dtype, broadcastable=(False,))
                y = tensor.ivector()
                z = tensor.ivector()
                s = tensor.ivector()

                a = as_sparse_variable(sp_types[format](random_lil((4, 3),
                                                                   dtype, 1)))

                f = theano.function([x, y, z, s],
                                    tensor.grad(dense_from_sparse(
                                        a * CSM(format)(x, y, z, s)).sum(), x))

                spmat = sp_types[format](random_lil((4, 3), dtype, 3))

                res = f(spmat.data, spmat.indices, spmat.indptr,
                        numpy.asarray(spmat.shape, 'int32'))

                assert len(spmat.data) == len(res) 
Example #21
Source File: dnn.py    From DL4H with MIT License 5 votes vote down vote up
def get_rmsprop_trainer(self, with_step_adapt=True, nesterov=False):  # TODO Nesterov momentum
        """ Returns an RmsProp (possibly Nesterov) (Sutskever 2013) trainer
        using self._rho, self._eps and self._momentum params. """
        # TODO CHECK
        batch_x = T.fmatrix('batch_x')
        batch_y = T.ivector('batch_y')
        learning_rate = T.fscalar('lr')  # learning rate
        gparams = T.grad(self.mean_cost, self.params)
        updates = OrderedDict()
        for accugrad, avggrad, accudelta, sa, param, gparam in zip(
                self._accugrads, self._avggrads, self._accudeltas,
                self._stepadapts, self.params, gparams):
            acc_grad = self._rho * accugrad + (1 - self._rho) * gparam * gparam
            avg_grad = self._rho * avggrad + (1 - self._rho) * gparam  # this decay/discount (self._rho) should differ from the one of the line above
            ###scaled_grad = gparam / T.sqrt(acc_grad + self._eps)  # original RMSprop gradient scaling
            scaled_grad = gparam / T.sqrt(acc_grad - avg_grad**2 + self._eps)  # Alex Graves' RMSprop variant (divide by a "running stddev" of the updates)
            if with_step_adapt:
                incr = sa * (1. + self._stepadapt_alpha)
                #decr = sa * (1. - self._stepadapt_alpha)
                decr = sa * (1. - 2*self._stepadapt_alpha)
                ###steps = sa * T.switch(accudelta * -gparam >= 0, incr, decr)
                steps = T.clip(T.switch(accudelta * -gparam >= 0, incr, decr), self._eps, 1./self._eps)  # bad overloading of self._eps!
                scaled_grad = steps * scaled_grad
                updates[sa] = steps
            dx = self._momentum * accudelta - learning_rate * scaled_grad
            updates[param] = param + dx
            updates[accugrad] = acc_grad
            updates[avggrad] = avg_grad
            updates[accudelta] = dx

        train_fn = theano.function(inputs=[theano.Param(batch_x),
                                           theano.Param(batch_y),
                                           theano.Param(learning_rate)],
                                   outputs=self.mean_cost,
                                   updates=updates,
                                   givens={self.x: batch_x, self.y: batch_y})

        return train_fn 
Example #22
Source File: kdl_template.py    From SciPy2015 with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def add_datasets_to_graph(list_of_datasets, list_of_names, graph, strict=True,
                          list_of_test_values=None):
    assert len(list_of_datasets) == len(list_of_names)
    datasets_added = []
    for n, (dataset, name) in enumerate(zip(list_of_datasets, list_of_names)):
        if dataset.dtype != "int32":
            if len(dataset.shape) == 1:
                sym = tensor.vector()
            elif len(dataset.shape) == 2:
                sym = tensor.matrix()
            elif len(dataset.shape) == 3:
                sym = tensor.tensor3()
            else:
                raise ValueError("dataset %s has unsupported shape" % name)
        elif dataset.dtype == "int32":
            if len(dataset.shape) == 1:
                sym = tensor.ivector()
            elif len(dataset.shape) == 2:
                sym = tensor.imatrix()
            elif len(dataset.shape) == 3:
                sym = tensor.itensor3()
            else:
                raise ValueError("dataset %s has unsupported shape" % name)
        else:
            raise ValueError("dataset %s has unsupported dtype %s" % (
                name, dataset.dtype))
        if list_of_test_values is not None:
            sym.tag.test_value = list_of_test_values[n]
        tag_expression(sym, name, dataset.shape)
        datasets_added.append(sym)
    graph["__datasets_added__"] = datasets_added
    return datasets_added 
Example #23
Source File: nn_adagrad_pca.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_iter_functions(self, dataset, output_layer, X_tensor_type=T.matrix):
        batch_index = T.iscalar('batch_index')
        X_batch = X_tensor_type('x')
        y_batch = T.ivector('y')

        batch_slice = slice(batch_index * self.batch_size, (batch_index + 1) * self.batch_size)

        objective = Objective(output_layer, loss_function=categorical_crossentropy)

        loss_train = objective.get_loss(X_batch, target=y_batch)
        loss_eval = objective.get_loss(X_batch, target=y_batch, deterministic=True)

        pred = T.argmax(output_layer.get_output(X_batch, deterministic=True), axis=1)
        proba = output_layer.get_output(X_batch, deterministic=True)
        accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)

        all_params = get_all_params(output_layer)
        updates = adagrad(loss_train, all_params, self.lr, self.epsilon)

        iter_train = theano.function(
            [batch_index], loss_train,
            updates=updates,
            givens={
                X_batch: dataset['X_train'][batch_slice],
                y_batch: dataset['y_train'][batch_slice],
            },
            on_unused_input='ignore',
        )

        iter_valid = None
        if self.use_valid:
            iter_valid = theano.function(
                [batch_index], [loss_eval, accuracy, proba],
                givens={
                    X_batch: dataset['X_valid'][batch_slice],
                    y_batch: dataset['y_valid'][batch_slice],
                },
            )

        return dict(train=iter_train, valid=iter_valid) 
Example #24
Source File: iv_rnn_one_hot.py    From pick_a_name with GNU General Public License v3.0 5 votes vote down vote up
def __theano_build__(self):
        U, V, W = self.U, self.V, self.W
        x = T.ivector('x')
        y = T.ivector('y')

        def forward_prop_step(x_t, s_t_prev, U, V, W):
            s_t = T.tanh(U[:, x_t] + W.dot(s_t_prev))
            o_t = T.nnet.softmax(V.dot(s_t))
            return [o_t[0], s_t]

        [o, s], updates = theano.scan(
            forward_prop_step,
            sequences=x,
            outputs_info=[None, dict(initial=T.zeros(self.hidden_dim))],
            non_sequences=[U, V, W],
            truncate_gradient=self.bptt_truncate,
            strict=True)

        prediction = T.argmax(o, axis=1)
        o_error = T.sum(T.nnet.categorical_crossentropy(o, y))

        # Gradients
        dU = T.grad(o_error, U)
        dV = T.grad(o_error, V)
        dW = T.grad(o_error, W)

        # Assign functions
        self.predict = theano.function([x], o)
        self.predict_class = theano.function([x], prediction)
        self.ce_error = theano.function([x, y], o_error)
        self.bptt = theano.function([x, y], [dU, dV, dW])

        # SGD
        learning_rate = T.scalar('learning_rate')
        self.sgd_step = theano.function([x, y, learning_rate], [],
                                        updates=[(self.U, self.U - learning_rate * dU),
                                                 (self.V, self.V - learning_rate * dV),
                                                 (self.W, self.W - learning_rate * dW)]) 
Example #25
Source File: models.py    From dcnn with MIT License 5 votes vote down vote up
def __init__(self, parameters, A):
        self.params = parameters

        # Prepare indices input.
        self.var_K = T.tensor3('Apow')
        self.var_X = T.matrix('X')
        self.var_I = T.ivector('I')
        self.var_Y = T.imatrix('Y')

        self.l_in_k = lasagne.layers.InputLayer((None, self.params.num_hops + 1, self.params.num_nodes),
                                                input_var=self.var_K)
        self.l_in_x = lasagne.layers.InputLayer((self.params.num_nodes, self.params.num_features), input_var=self.var_X)
        self.l_indices = lasagne.layers.InputLayer(
            (None,),
            input_var=self.var_I
        )

        self.K = util.A_to_diffusion_kernel(A, self.params.num_hops)

        # Overridable to customize init behavior.
        self._register_model_layers()

        loss_fn = params.loss_map[self.params.loss_fn]
        update_fn = params.update_map[self.params.update_fn]

        prediction = lasagne.layers.get_output(self.l_out)
        self._loss = lasagne.objectives.aggregate(loss_fn(prediction, self.var_Y), mode='mean')
        model_parameters = lasagne.layers.get_all_params(self.l_out)
        self._updates = update_fn(self._loss, model_parameters, learning_rate=self.params.learning_rate)
        if self.params.momentum:
            self._updates = lasagne.updates.apply_momentum(self._updates, model_parameters)

        self.apply_loss_and_update = theano.function([self.var_K, self.var_X, self.var_I, self.var_Y], self._loss,
                                                     updates=self._updates)
        self.apply_loss = theano.function([self.var_K, self.var_X, self.var_I, self.var_Y], self._loss) 
Example #26
Source File: logreg.py    From twitter-sent-dnn with MIT License 5 votes vote down vote up
def errors(self, y):
        """
        the error rate

        :type y: theano.tensor.ivector
        :param y: the class labels to be compared with
        """
        assert y.ndim == self.pred_y.ndim
        assert y.dtype.startswith('int')

        return T.mean(T.neq(self.pred_y, y)) 
Example #27
Source File: nn_adagrad_log.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_iter_functions(self, dataset, output_layer, X_tensor_type=T.matrix):
        batch_index = T.iscalar('batch_index')
        X_batch = X_tensor_type('x')
        y_batch = T.ivector('y')

        batch_slice = slice(batch_index * self.batch_size, (batch_index + 1) * self.batch_size)

        objective = Objective(output_layer, loss_function=categorical_crossentropy)

        loss_train = objective.get_loss(X_batch, target=y_batch)
        loss_eval = objective.get_loss(X_batch, target=y_batch, deterministic=True)

        pred = T.argmax(output_layer.get_output(X_batch, deterministic=True), axis=1)
        proba = output_layer.get_output(X_batch, deterministic=True)
        accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)

        all_params = get_all_params(output_layer)
        updates = adagrad(loss_train, all_params, self.lr, self.rho)

        iter_train = theano.function(
            [batch_index], loss_train,
            updates=updates,
            givens={
                X_batch: dataset['X_train'][batch_slice],
                y_batch: dataset['y_train'][batch_slice],
            },
            on_unused_input='ignore',
        )

        iter_valid = None
        if self.use_valid:
            iter_valid = theano.function(
                [batch_index], [loss_eval, accuracy, proba],
                givens={
                    X_batch: dataset['X_valid'][batch_slice],
                    y_batch: dataset['y_valid'][batch_slice],
                },
            )

        return dict(train=iter_train, valid=iter_valid) 
Example #28
Source File: training_strategy.py    From dcase_task2 with MIT License 5 votes vote down vote up
def __init__(self, batch_size=100, ini_learning_rate=0.001, max_epochs=100, patience=10, y_tensor_type=T.ivector,
                 L2=1e-4, objective=mean_categorical_crossentropy, adapt_learn_rate=get_constant(),
                 update_function=get_update_adam(), valid_batch_iter=get_batch_iterator(),
                 train_batch_iter=get_batch_iterator(), use_weights=False, samples_per_epoch=None,
                 shuffle_train=True, report_dices=False, refinement_strategy=RefinementStrategy(),
                 best_model_by_accurary=False, debug_mode=False, layer_update_filter=None, report_map=3):
        """
        Constructor
        """
        self.batch_size = batch_size
        self.ini_learning_rate = ini_learning_rate
        self.max_epochs = max_epochs
        self.patience = patience
        self.y_tensor_type = y_tensor_type
        self.L2 = L2
        self.objective = objective
        self.adapt_learn_rate = adapt_learn_rate
        self.update_function = update_function
        self.valid_batch_iter = valid_batch_iter
        self.train_batch_iter = train_batch_iter
        self.use_weights = use_weights
        self.samples_per_epoch = samples_per_epoch
        self.shuffle_train = shuffle_train
        self.report_dices = report_dices
        self.refinement_strategy = refinement_strategy
        self.best_model_by_accurary = best_model_by_accurary
        self.debug_mode = debug_mode
        self.layer_update_filter = layer_update_filter
        self.report_map = report_map 
Example #29
Source File: nn_adagrad_pca.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_iter_functions(self, dataset, output_layer, X_tensor_type=T.matrix):
        batch_index = T.iscalar('batch_index')
        X_batch = X_tensor_type('x')
        y_batch = T.ivector('y')

        batch_slice = slice(batch_index * self.batch_size, (batch_index + 1) * self.batch_size)

        objective = Objective(output_layer, loss_function=categorical_crossentropy)

        loss_train = objective.get_loss(X_batch, target=y_batch)
        loss_eval = objective.get_loss(X_batch, target=y_batch, deterministic=True)

        pred = T.argmax(output_layer.get_output(X_batch, deterministic=True), axis=1)
        proba = output_layer.get_output(X_batch, deterministic=True)
        accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)

        all_params = get_all_params(output_layer)
        updates = adagrad(loss_train, all_params, self.lr, self.epsilon)

        iter_train = theano.function(
            [batch_index], loss_train,
            updates=updates,
            givens={
                X_batch: dataset['X_train'][batch_slice],
                y_batch: dataset['y_train'][batch_slice],
            },
            on_unused_input='ignore',
        )

        iter_valid = None
        if self.use_valid:
            iter_valid = theano.function(
                [batch_index], [loss_eval, accuracy, proba],
                givens={
                    X_batch: dataset['X_valid'][batch_slice],
                    y_batch: dataset['y_valid'][batch_slice],
                },
            )

        return dict(train=iter_train, valid=iter_valid) 
Example #30
Source File: kdl_template.py    From SciPy2015 with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def add_datasets_to_graph(list_of_datasets, list_of_names, graph, strict=True,
                          list_of_test_values=None):
    assert len(list_of_datasets) == len(list_of_names)
    datasets_added = []
    for n, (dataset, name) in enumerate(zip(list_of_datasets, list_of_names)):
        if dataset.dtype != "int32":
            if len(dataset.shape) == 1:
                sym = tensor.vector()
            elif len(dataset.shape) == 2:
                sym = tensor.matrix()
            elif len(dataset.shape) == 3:
                sym = tensor.tensor3()
            else:
                raise ValueError("dataset %s has unsupported shape" % name)
        elif dataset.dtype == "int32":
            if len(dataset.shape) == 1:
                sym = tensor.ivector()
            elif len(dataset.shape) == 2:
                sym = tensor.imatrix()
            elif len(dataset.shape) == 3:
                sym = tensor.itensor3()
            else:
                raise ValueError("dataset %s has unsupported shape" % name)
        else:
            raise ValueError("dataset %s has unsupported dtype %s" % (
                name, dataset.dtype))
        if list_of_test_values is not None:
            sym.tag.test_value = list_of_test_values[n]
        tag_expression(sym, name, dataset.shape)
        datasets_added.append(sym)
    graph["__datasets_added__"] = datasets_added
    return datasets_added