Python theano.tensor.grad() Examples

The following are 30 code examples for showing how to use theano.tensor.grad(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module theano.tensor , or try the search function .

Example 1
Project: gated-graph-transformer-network   Author: hexahedria   File: adam.py    License: MIT License 6 votes vote down vote up
def Adam(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8):
    updates = []
    grads = T.grad(cost, params)
    i = theano.shared(np.array(0., theano.config.floatX))
    i_t = i + 1.
    fix1 = 1. - (1. - b1)**i_t
    fix2 = 1. - (1. - b2)**i_t
    lr_t = lr * (T.sqrt(fix2) / fix1)
    for p, g in zip(params, grads):
        m = theano.shared(p.get_value() * 0.)
        v = theano.shared(p.get_value() * 0.)
        m_t = (b1 * g) + ((1. - b1) * m)
        v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
        g_t = m_t / (T.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)
        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((i, i_t))
    return updates 
Example 2
Project: Depth-Map-Prediction   Author: hjimce   File: pooling.py    License: GNU General Public License v3.0 6 votes vote down vote up
def test_cmrnorm():
    from theano.tests.unittest_tools import verify_grad

    xtest = np.random.rand(2,8,3,4)
    xtest = xtest.astype(theano.config.floatX)

    x = T.tensor4('x', dtype=theano.config.floatX)
    x.tag.test_value = xtest

    y = cmrnorm(x, input_shape=xtest.shape[1:])
    f = theano.function([x], y, mode='DEBUG_MODE')
    f(xtest)

    f = theano.function([x], gpu_from_host(T.grad(T.sum(y), wrt=x)),
                        mode='DEBUG_MODE')
    f(xtest)
    theano.printing.debugprint(f)

    T.verify_grad(lambda x: cmrnorm(x, input_shape=xtest.shape[1:]),
                  (xtest,),
                  rng=np.random.RandomState(0))

    print 'cmrnorm passed' 
Example 3
Project: spinn   Author: stanfordnlp   File: test_stack.py    License: MIT License 6 votes vote down vote up
def test_speed(self):
        top = self.stack.final_stack[-self.batch_size:]
        cost = self._make_cost(top)
        error_signal = T.grad(cost, top)

        # Build automatic backprop function.
        self.stack.make_backprop_scan(error_signal, [self.y],
                                      compute_embedding_gradients=False)
        f = theano.function(
            [self.X, self.transitions, self.y],
            [cost] + self.stack.gradients.values(),
            updates=self.stack.scan_updates + self.stack.bscan_updates)
        theano.printing.debugprint(f.maker.fgraph.outputs[1])

        for t in range(10):
            self._run_batch(f) 
Example 4
Project: spinn   Author: stanfordnlp   File: blocks.py    License: MIT License 6 votes vote down vote up
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6, grads=None):
    # From:
    # https://github.com/Newmu/Theano-Tutorials/blob/master/4_modern_net.py
    if grads is None:
        grads = T.grad(cost=cost, wrt=params)
    assert len(grads) == len(params)

    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(np.zeros_like(p.get_value(), dtype=np.float32),
                            name="%s/rms/acc" % p.name)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates 
Example 5
Project: hgru4rec   Author: mquad   File: hgru4rec.py    License: MIT License 6 votes vote down vote up
def adam(self, param, grad, updates, sample_idx=None, epsilon=1e-6):
        v1 = np.float32(self.decay)
        v2 = np.float32(1.0 - self.decay)
        acc = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        meang = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        countt = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        if sample_idx is None:
            acc_new = v1 * acc + v2 * grad ** 2
            meang_new = v1 * meang + v2 * grad
            countt_new = countt + 1
            updates[acc] = acc_new
            updates[meang] = meang_new
            updates[countt] = countt_new
        else:
            acc_s = acc[sample_idx]
            meang_s = meang[sample_idx]
            countt_s = countt[sample_idx]
            acc_new = v1 * acc_s + v2 * grad ** 2
            meang_new = v1 * meang_s + v2 * grad
            countt_new = countt_s + 1.0
            updates[acc] = T.set_subtensor(acc_s, acc_new)
            updates[meang] = T.set_subtensor(meang_s, meang_new)
            updates[countt] = T.set_subtensor(countt_s, countt_new)
        return (meang_new / (1 - v1 ** countt_new)) / (T.sqrt(acc_new / (1 - v1 ** countt_new)) + epsilon) 
Example 6
Project: hgru4rec   Author: mquad   File: hgru4rec.py    License: MIT License 6 votes vote down vote up
def adadelta(self, param, grad, updates, sample_idx=None, epsilon=1e-6):
        v1 = np.float32(self.decay)
        v2 = np.float32(1.0 - self.decay)
        acc = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        upd = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        if sample_idx is None:
            acc_new = acc + grad ** 2
            updates[acc] = acc_new
            grad = T.sqrt(upd + epsilon) * grad
            upd_new = v1 * upd + v2 * grad ** 2
            updates[upd] = upd_new
        else:
            acc_s = acc[sample_idx]
            acc_new = acc_s + grad ** 2
            updates[acc] = T.set_subtensor(acc_s, acc_new)
            upd_s = upd[sample_idx]
            upd_new = v1 * upd_s + v2 * grad ** 2
            updates[upd] = T.set_subtensor(upd_s, upd_new)
            grad = T.sqrt(upd_s + epsilon) * grad
        gradient_scaling = T.cast(T.sqrt(acc_new + epsilon), theano.config.floatX)
        return grad / gradient_scaling 
Example 7
Project: opt-mmd   Author: djsutherland   File: nn.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates 
Example 8
Project: iGAN   Author: junyanz   File: updates.py    License: MIT License 6 votes vote down vote up
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        t = theano.shared(floatX(1.))
        b1_t = self.b1 * self.l**(t - 1)

        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)

            m_t = b1_t * m + (1 - b1_t) * g
            v_t = self.b2 * v + (1 - self.b2) * g**2
            m_c = m_t / (1 - self.b1**t)
            v_c = v_t / (1 - self.b2**t)
            p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e)
            p_t = self.regularizer.weight_regularize(p_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t))
        updates.append((t, t + 1.))
        return updates 
Example 9
Project: iGAN   Author: junyanz   File: updates.py    License: MIT License 6 votes vote down vote up
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)

            acc = theano.shared(p.get_value() * 0.)
            acc_delta = theano.shared(p.get_value() * 0.)
            acc_new = self.rho * acc + (1 - self.rho) * g ** 2
            updates.append((acc, acc_new))

            update = g * T.sqrt(acc_delta + self.epsilon) / T.sqrt(acc_new + self.epsilon)
            updated_p = p - self.lr * update
            updated_p = self.regularizer.weight_regularize(updated_p)
            updates.append((p, updated_p))

            acc_delta_new = self.rho * acc_delta + (1 - self.rho) * update ** 2
            updates.append((acc_delta, acc_delta_new))
        return updates 
Example 10
Project: Projects   Author: iamshang1   File: convolutional_nn.py    License: MIT License 6 votes vote down vote up
def __init__(self,convolutional_layers,feature_maps,filter_shapes,poolsize,feedforward_layers,feedforward_nodes,classes,learning_rate,regularization):
        self.input = T.tensor4()
        self.convolutional_layers = []
        self.convolutional_layers.append(convolutional_layer(self.input,feature_maps[1],feature_maps[0],filter_shapes[0][0],filter_shapes[0][1],poolsize[0]))
        for i in range(1,convolutional_layers):
            self.convolutional_layers.append(convolutional_layer(self.convolutional_layers[i-1].output,feature_maps[i+1],feature_maps[i],filter_shapes[i][0],filter_shapes[i][1],poolsize[i]))
        self.feedforward_layers = []
        self.feedforward_layers.append(feedforward_layer(self.convolutional_layers[-1].output.flatten(2),flattened,feedforward_nodes[0]))
        for i in range(1,feedforward_layers):
            self.feedforward_layers.append(feedforward_layer(self.feedforward_layers[i-1].output,feedforward_nodes[i-1],feedforward_nodes[i]))
        self.output_layer = feedforward_layer(self.feedforward_layers[-1].output,feedforward_nodes[-1],classes)
        self.params = []
        for l in self.convolutional_layers + self.feedforward_layers:
            self.params.extend(l.get_params())
        self.params.extend(self.output_layer.get_params())
        self.target = T.matrix()
        self.output = self.output_layer.output
        self.cost = -self.target*T.log(self.output)-(1-self.target)*T.log(1-self.output)
        self.cost = self.cost.mean()
        for i in range(convolutional_layers+feedforward_layers+1):
            self.cost += regularization*(self.params[2*i]**2).mean()
        self.gparams = [T.grad(self.cost, param) for param in self.params]
        self.propogate = theano.function([self.input,self.target],self.cost,updates=[(param,param-learning_rate*gparam) for param,gparam in zip(self.params,self.gparams)],allow_input_downcast=True)
        self.classify = theano.function([self.input],self.output,allow_input_downcast=True) 
Example 11
Project: Projects   Author: iamshang1   File: theano_nn.py    License: MIT License 6 votes vote down vote up
def __init__(self,classes,hidden_layers,features,nodes_per_hidden_layer,learning_rate,regularization):
        self.hidden_layers = []
        self.hidden_layers.append(layer(features,nodes_per_hidden_layer))
        for i in range(hidden_layers-1):
            self.hidden_layers.append(layer(nodes_per_hidden_layer,nodes_per_hidden_layer))
        self.output_layer = layer(nodes_per_hidden_layer,classes)
        self.params = []
        for l in self.hidden_layers:
            self.params.extend(l.get_params())
        self.params.extend(self.output_layer.get_params())
        self.A = T.matrix()
        self.t = T.matrix()
        self.s = 1/(1+T.exp(-T.dot(self.A,self.params[0])-self.params[1]))
        for i in range(hidden_layers):
            self.s = 1/(1+T.exp(-T.dot(self.s,self.params[2*(i+1)])-self.params[2*(i+1)+1]))
        self.cost = -self.t*T.log(self.s)-(1-self.t)*T.log(1-self.s)
        self.cost = self.cost.mean()
        for i in range(hidden_layers+1):
            self.cost += regularization*(self.params[2*i]**2).mean()
        self.gparams = [T.grad(self.cost, param) for param in self.params]
        self.propogate = theano.function([self.A,self.t],self.cost,updates=[(param,param-learning_rate*gparam) for param,gparam in zip(self.params,self.gparams)],allow_input_downcast=True)
        self.classify = theano.function([self.A],self.s,allow_input_downcast=True) 
Example 12
Project: Projects   Author: iamshang1   File: conv_net.py    License: MIT License 6 votes vote down vote up
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates 
Example 13
Project: Projects   Author: iamshang1   File: model1.py    License: MIT License 6 votes vote down vote up
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        '''
        adam gradient descent updates
        '''
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

#open previous lowest training cost if it exists 
Example 14
Project: Projects   Author: iamshang1   File: model2.py    License: MIT License 6 votes vote down vote up
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        '''
        adam gradient descent updates
        '''
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

#load saved lstm if it exists, else initialize new lstm 
Example 15
Project: Projects   Author: iamshang1   File: lstm_between_subject.py    License: MIT License 6 votes vote down vote up
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        '''
        adaptive moment estimation gradient descent
        '''
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

#load data 
Example 16
Project: Projects   Author: iamshang1   File: lstm_within_subject.py    License: MIT License 6 votes vote down vote up
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        '''
        adaptive moment estimation gradient descent
        '''
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

#load data 
Example 17
Project: Projects   Author: iamshang1   File: convlstm_between_subject.py    License: MIT License 6 votes vote down vote up
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        '''
        adaptive moment estimation gradient descent
        '''
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

#load data 
Example 18
Project: Projects   Author: iamshang1   File: convnet.py    License: MIT License 6 votes vote down vote up
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates 
Example 19
Project: Projects   Author: iamshang1   File: conv2d_crossvalidation.py    License: MIT License 6 votes vote down vote up
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates 
Example 20
Project: Projects   Author: iamshang1   File: batch_normalization.py    License: MIT License 6 votes vote down vote up
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates 
Example 21
Project: Projects   Author: iamshang1   File: rbm_pretraining.py    License: MIT License 6 votes vote down vote up
def get_cost_updates(self, lr=0.1, persistent=None, k=1):
        pre_sigmoid_ph, ph_mean, ph_sample = self.sample_h_given_v(self.input)
        if persistent is None:
            chain_start = ph_sample
        else:
            chain_start = persistent
        ([pre_sigmoid_nvs,nv_means,nv_samples,pre_sigmoid_nhs,nh_means,nh_samples],updates) = \
            theano.scan(self.gibbs_step, outputs_info=[None, None, None, None, None, chain_start],n_steps=k,name="gibbs_step")
        chain_end = nv_samples[-1]
        cost = T.mean(self.free_energy(self.input)) - T.mean(self.free_energy(chain_end))
        gparams = T.grad(cost, self.params, consider_constant=[chain_end])
        for gparam, param in zip(gparams, self.params):
            updates[param] = param - gparam * T.cast(lr,dtype=theano.config.floatX)
        if persistent:
            updates[persistent] = nh_samples[-1]
            monitoring_cost = self.get_pseudo_likelihood_cost(updates)  
        else:
            monitoring_cost = self.get_reconstruction_cost(updates,pre_sigmoid_nvs[-1])
        return monitoring_cost, updates 
Example 22
Project: Projects   Author: iamshang1   File: rbm_pretraining.py    License: MIT License 6 votes vote down vote up
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates 
Example 23
Project: Projects   Author: iamshang1   File: residual_gradient_descent.py    License: MIT License 6 votes vote down vote up
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates 
Example 24
Project: Projects   Author: iamshang1   File: gradient_descent.py    License: MIT License 6 votes vote down vote up
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates 
Example 25
Project: D-VAE   Author: muhanzhang   File: aa.py    License: MIT License 6 votes vote down vote up
def __init__(self):
        super(M, self).__init__()

        x = T.matrix('x') # input, target
        self.w = module.Member(T.matrix('w')) # weights
        self.a = module.Member(T.vector('a')) # hid bias
        self.b = module.Member(T.vector('b')) # output bias

        self.hid = T.tanh(T.dot(x, self.w) + self.a)
        hid = self.hid

        self.out = T.tanh(T.dot(hid, self.w.T) + self.b)
        out = self.out

        self.err = 0.5 * T.sum((out - x)**2)
        err = self.err

        params = [self.w, self.a, self.b]

        gparams = T.grad(err, params)

        updates = [(p, p - 0.01 * gp) for p, gp in zip(params, gparams)]

        self.step = module.Method([x], err, updates=dict(updates)) 
Example 26
Project: D-VAE   Author: muhanzhang   File: test_opt.py    License: MIT License 6 votes vote down vote up
def test_local_csm_grad_c():
    raise SkipTest("Opt disabled as it don't support unsorted indices")
    if not theano.config.cxx:
        raise SkipTest("G++ not available, so we need to skip this test.")
    data = tensor.vector()
    indices, indptr, shape = (tensor.ivector(), tensor.ivector(),
                              tensor.ivector())
    mode = theano.compile.mode.get_default_mode()

    if theano.config.mode == 'FAST_COMPILE':
        mode = theano.compile.Mode(linker='c|py', optimizer='fast_compile')

    mode = mode.including("specialize", "local_csm_grad_c")
    for CS, cast in [(sparse.CSC, sp.csc_matrix), (sparse.CSR, sp.csr_matrix)]:
        cost = tensor.sum(sparse.DenseFromSparse()(CS(data, indices, indptr, shape)))
        f = theano.function(
            [data, indices, indptr, shape],
            tensor.grad(cost, data),
            mode=mode)
        assert not any(isinstance(node.op, sparse.CSMGrad) for node
                       in f.maker.fgraph.toposort())
        v = cast(random_lil((10, 40),
                            config.floatX, 3))
        f(v.data, v.indices, v.indptr, v.shape) 
Example 27
Project: D-VAE   Author: muhanzhang   File: test_basic.py    License: MIT License 6 votes vote down vote up
def test_csm_grad(self):
        for sparsetype in ('csr', 'csc'):
            x = tensor.vector()
            y = tensor.ivector()
            z = tensor.ivector()
            s = tensor.ivector()
            call = getattr(sp, sparsetype + '_matrix')
            spm = call(random_lil((300, 400), config.floatX, 5))
            out = tensor.grad(dense_from_sparse(
                CSM(sparsetype)(x, y, z, s)
            ).sum(), x)
            self._compile_and_check([x, y, z, s],
                                    [out],
                                    [spm.data, spm.indices, spm.indptr,
                                     spm.shape],
                                    (CSMGrad, CSMGradC)
                                   ) 
Example 28
Project: D-VAE   Author: muhanzhang   File: test_nnet.py    License: MIT License 6 votes vote down vote up
def test_local_softmax_grad_optimization_and_big_input(self):
        """Test the Logsoftmax's grad substitution.

        Check that Log(Softmax(x))'s grad is substituted with Logsoftmax(x)'s
        grad and that the new operation does not explode for big inputs.
        Note that only the grad is checked.
        """
        m = theano.config.mode
        m = theano.compile.get_mode(m)
        m.check_isfinite = False
        # some inputs that are large to make the gradient explode in the non
        # optimized case
        a = numpy.exp(10 * numpy.random.rand(5, 10).astype(theano.config.floatX))

        def myfunc(x):
            sm = tensor.nnet.softmax(x)
            logsm = tensor.log(sm)
            return logsm
        # We set step to 0.1 because for big values we need a big epsilon
        utt.verify_grad(myfunc, [a], eps=0.1, mode=m)
        f = theano.function([], myfunc(a))
        self.assertTrue(hasattr(f.maker.fgraph.outputs[0].tag, 'trace')) 
Example 29
Project: Att-ChemdNER   Author: lingluodlut   File: optimization.py    License: Apache License 2.0 5 votes vote down vote up
def get_gradients(self, cost, params):
        """
        Compute the gradients, and clip them if required.
        """
        if self.clip is None:
            return T.grad(cost, params)
        else:
            assert self.clip > 0
            return T.grad(
                theano.gradient.grad_clip(cost, -1 * self.clip, self.clip),
                params 
            ) 
Example 30
Project: Att-ChemdNER   Author: lingluodlut   File: theano_backend.py    License: Apache License 2.0 5 votes vote down vote up
def gradients(loss, variables):
    return T.grad(loss, variables)