Python theano.shared() Examples

The following are 30 code examples of theano.shared(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module theano , or try the search function .
Example #1
Source File: optimization.py    From Att-ChemdNER with Apache License 2.0 7 votes vote down vote up
def sgd(self, cost, params,constraints={}, lr=0.01):
#{{{
        """
        Stochatic gradient descent.
        """
        updates = []
        
        lr = theano.shared(np.float32(lr).astype(floatX))
        gradients = self.get_gradients(cost, params)
        
        for p, g in zip(params, gradients):
            v=-lr*g;
            new_p=p+v;
            # apply constraints
            if p in constraints:
                c=constraints[p];
                new_p=c(new_p);
            updates.append((p, new_p))

        return updates
#}}} 
Example #2
Source File: optimization.py    From Att-ChemdNER with Apache License 2.0 6 votes vote down vote up
def adadelta(self, cost, params, rho=0.95, epsilon=1e-6,consider_constant=None):
        """
        Adadelta. Based on:
        http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf
        """
        rho = theano.shared(np.float32(rho).astype(floatX))
        epsilon = theano.shared(np.float32(epsilon).astype(floatX))

        gradients = self.get_gradients(cost, params,consider_constant)
        accu_gradients = [theano.shared(np.zeros_like(param.get_value(borrow=True)).astype(floatX)) for param in params]
        accu_deltas = [theano.shared(np.zeros_like(param.get_value(borrow=True)).astype(floatX)) for param in params]

        updates = []
        for param, gradient, accu_gradient, accu_delta in zip(params, gradients, accu_gradients, accu_deltas):
            new_accu_gradient = rho * accu_gradient + (1. - rho) * gradient ** 2.
            delta_x = - T.sqrt((accu_delta + epsilon) / (new_accu_gradient + epsilon)) * gradient
            new_accu_delta = rho * accu_delta + (1. - rho) * delta_x ** 2.
            updates.append((accu_gradient, new_accu_gradient))
            updates.append((accu_delta, new_accu_delta))
            updates.append((param, param + delta_x))
        return updates 
Example #3
Source File: net.py    From Depth-Map-Prediction with GNU General Public License v3.0 6 votes vote down vote up
def _init_params(self, init_W, tie_params):
        (nfilt, fc, fi, fj) = self.filter_shape

        if 'W' not in tie_params:
            if init_W is None:
                w_shape = self.filter_shape
                init_W = self.conf.geteval('init_W')(w_shape).astype(floatX)
            self.W = theano.shared(value=init_W, name='W')
            self.params.append(self.W)

        if self.have_bias and 'b' not in tie_params:
            init_b = self.conf.geteval('init_b', 0)
            nb = nfilt if not self.transpose else fc
            self.b = theano.shared(init_b + np.zeros(nb, dtype=floatX),
                                   name='b')
            self.params.append(self.b)
    #计算网络的输出 
Example #4
Source File: optimization.py    From Att-ChemdNER with Apache License 2.0 6 votes vote down vote up
def sgdmomentum(self, cost, params,constraints={}, lr=0.01,consider_constant=None, momentum=0.):
        """
        Stochatic gradient descent with momentum. Momentum has to be in [0, 1)
        """
        # Check that the momentum is a correct value
        assert 0 <= momentum < 1

        lr = theano.shared(np.float32(lr).astype(floatX))
        momentum = theano.shared(np.float32(momentum).astype(floatX))

        gradients = self.get_gradients(cost, params)
        velocities = [theano.shared(np.zeros_like(param.get_value(borrow=True)).astype(floatX)) for param in params]

        updates = []
        for param, gradient, velocity in zip(params, gradients, velocities):
            new_velocity = momentum * velocity - lr * gradient
            updates.append((velocity, new_velocity))
            new_p=param+new_velocity;
            # apply constraints
            if param in constraints:
                c=constraints[param];
                new_p=c(new_p);
            updates.append((param, new_p))
        return updates 
Example #5
Source File: net.py    From Depth-Map-Prediction with GNU General Public License v3.0 6 votes vote down vote up
def _init_params(self, init_W, tie_params):
        if 'W' not in tie_params:
            if init_W is None:
                w_shape = (self.ninput, self.noutput)
                init_W = self.conf.geteval('init_W')(w_shape).astype(floatX)
            self.W = theano.shared(value=init_W, name='W')
            self.params.append(self.W)

        if self.have_bias and 'b' not in tie_params:
            nbias = self.noutput if not self.transpose else self.ninput
            init_b = self.conf.geteval('init_b', 0)
            init_b = self.conf.geteval('init_bias', init_b)
            self.bias = theano.shared(init_b + np.zeros(nbias, dtype=floatX),
                                      name='bias')
            self.params.append(self.bias)
    #网络输出计算 
Example #6
Source File: optimization.py    From Att-ChemdNER with Apache License 2.0 6 votes vote down vote up
def rmsprop(self, cost, params, lr=0.001, rho=0.9, eps=1e-6,consider_constant=None):
        """
        RMSProp.
        """
        lr = theano.shared(np.float32(lr).astype(floatX))

        gradients = self.get_gradients(cost, params,consider_constant)
        accumulators = [theano.shared(np.zeros_like(p.get_value()).astype(np.float32)) for p in params]

        updates = []

        for param, gradient, accumulator in zip(params, gradients, accumulators):
            new_accumulator = rho * accumulator + (1 - rho) * gradient ** 2
            updates.append((accumulator, new_accumulator))

            new_param = param - lr * gradient / T.sqrt(new_accumulator + eps)
            updates.append((param, new_param))

        return updates 
Example #7
Source File: base_gru.py    From gated-graph-transformer-network with MIT License 6 votes vote down vote up
def __init__(self, input_width, output_width, activation_shift=0.0, name=None, dropout_keep=1, dropout_input=False, dropout_output=True):
        """
        Params:
            input_width: Width of input
            output_width: Width of the GRU output
            activation_shift: How to shift the biases of the activation
        """
        self._input_width = input_width
        self._output_width = output_width

        prefix = "" if name is None else name + "_"

        self._reset_W = theano.shared(init_params([input_width + output_width, output_width]), prefix+"reset_W")
        self._reset_b = theano.shared(init_params([output_width], shift=1.0), prefix+"reset_b")

        self._update_W = theano.shared(init_params([input_width + output_width, output_width]), prefix+"update_W")
        self._update_b = theano.shared(init_params([output_width], shift=1.0), prefix+"update_b")

        self._activation_W = theano.shared(init_params([input_width + output_width, output_width]), prefix+"activation_W")
        self._activation_b = theano.shared(init_params([output_width], shift=activation_shift), prefix+"activation_b")

        self._dropout_keep = dropout_keep
        self._dropout_input = dropout_input
        self._dropout_output = dropout_output 
Example #8
Source File: strength_weighted_gru.py    From gated-graph-transformer-network with MIT License 6 votes vote down vote up
def __init__(self, input_width, output_width, activation_shift=0.0, name=None):
        """
        Params:
            input_width: Width of input.
            output_width: Width of the GRU output
            activation_shift: How to shift the biases of the activation
        """
        self._input_width = input_width
        self._output_width = output_width

        prefix = "" if name is None else name + "_"

        self._reset_W = theano.shared(init_params([input_width + output_width, output_width]), prefix+"reset_W")
        self._reset_b = theano.shared(init_params([output_width], shift=1.0), prefix+"reset_b")

        self._update_W = theano.shared(init_params([input_width + output_width, output_width+1]), prefix+"update_W")
        self._update_b = theano.shared(init_params([output_width+1], shift=1.0), prefix+"update_b")

        self._activation_W = theano.shared(init_params([input_width + output_width, output_width]), prefix+"activation_W")
        self._activation_b = theano.shared(init_params([output_width], shift=activation_shift), prefix+"activation_b")

        self._strength_W = theano.shared(init_params([input_width + output_width, 1]), prefix+"strength_W")
        self._strength_b = theano.shared(init_params([1], shift=1.0), prefix+"strength_b") 
Example #9
Source File: optimization.py    From Att-ChemdNER with Apache License 2.0 6 votes vote down vote up
def adagrad(self, cost, params, lr=1.0, epsilon=1e-6,consider_constant=None):
        """
        Adagrad. Based on http://www.ark.cs.cmu.edu/cdyer/adagrad.pdf
        """
        lr = theano.shared(np.float32(lr).astype(floatX))
        epsilon = theano.shared(np.float32(epsilon).astype(floatX))

        gradients = self.get_gradients(cost, params,consider_constant)
        gsums = [theano.shared(np.zeros_like(param.get_value(borrow=True)).astype(floatX)) for param in params]

        updates = []
        for param, gradient, gsum in zip(params, gradients, gsums):
            new_gsum = gsum + gradient ** 2.
            updates.append((gsum, new_gsum))
            updates.append((param, param - lr * gradient / (T.sqrt(gsum + epsilon))))
        return updates 
Example #10
Source File: adam.py    From gated-graph-transformer-network with MIT License 6 votes vote down vote up
def Adam(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8):
    updates = []
    grads = T.grad(cost, params)
    i = theano.shared(np.array(0., theano.config.floatX))
    i_t = i + 1.
    fix1 = 1. - (1. - b1)**i_t
    fix2 = 1. - (1. - b2)**i_t
    lr_t = lr * (T.sqrt(fix2) / fix1)
    for p, g in zip(params, grads):
        m = theano.shared(p.get_value() * 0.)
        v = theano.shared(p.get_value() * 0.)
        m_t = (b1 * g) + ((1. - b1) * m)
        v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
        g_t = m_t / (T.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)
        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((i, i_t))
    return updates 
Example #11
Source File: nmt.py    From nmt with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def rmsprop(lr, tparams, grads, inp, cost):
    zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
    running_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad'%k) for k, p in tparams.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function(inp, cost, updates=zgup+rgup+rg2up, profile=profile)

    updir = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_updir'%k) for k, p in tparams.iteritems()]
    updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4)) for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, running_grads2)]
    param_up = [(p, p + udn[1]) for p, udn in zip(itemlist(tparams), updir_new)]
    f_update = theano.function([lr], [], updates=updir_new+param_up, on_unused_input='ignore', profile=profile)

    return f_grad_shared, f_update 
Example #12
Source File: theano_backend.py    From Att-ChemdNER with Apache License 2.0 6 votes vote down vote up
def variable(value, dtype=None, name=None):
    '''Instantiates a variable and returns it.

    # Arguments
        value: Numpy array, initial value of the tensor.
        dtype: Tensor type.
        name: Optional name string for the tensor.

    # Returns
        A variable instance (with Keras metadata included).
    '''
    if dtype is None:
        dtype = floatx()
    if hasattr(value, 'tocoo'):
        _assert_sparse_module()
        variable = th_sparse_module.as_sparse_variable(value)
    else:
        value = np.asarray(value, dtype=dtype)
        variable = theano.shared(value=value, name=name, strict=False)
    variable._keras_shape = value.shape
    variable._uses_learning_phase = False
    return variable 
Example #13
Source File: blocks.py    From spinn with MIT License 6 votes vote down vote up
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6, grads=None):
    # From:
    # https://github.com/Newmu/Theano-Tutorials/blob/master/4_modern_net.py
    if grads is None:
        grads = T.grad(cost=cost, wrt=params)
    assert len(grads) == len(params)

    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(np.zeros_like(p.get_value(), dtype=np.float32),
                            name="%s/rms/acc" % p.name)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates 
Example #14
Source File: hgru4rec.py    From hgru4rec with MIT License 6 votes vote down vote up
def adam(self, param, grad, updates, sample_idx=None, epsilon=1e-6):
        v1 = np.float32(self.decay)
        v2 = np.float32(1.0 - self.decay)
        acc = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        meang = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        countt = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        if sample_idx is None:
            acc_new = v1 * acc + v2 * grad ** 2
            meang_new = v1 * meang + v2 * grad
            countt_new = countt + 1
            updates[acc] = acc_new
            updates[meang] = meang_new
            updates[countt] = countt_new
        else:
            acc_s = acc[sample_idx]
            meang_s = meang[sample_idx]
            countt_s = countt[sample_idx]
            acc_new = v1 * acc_s + v2 * grad ** 2
            meang_new = v1 * meang_s + v2 * grad
            countt_new = countt_s + 1.0
            updates[acc] = T.set_subtensor(acc_s, acc_new)
            updates[meang] = T.set_subtensor(meang_s, meang_new)
            updates[countt] = T.set_subtensor(countt_s, countt_new)
        return (meang_new / (1 - v1 ** countt_new)) / (T.sqrt(acc_new / (1 - v1 ** countt_new)) + epsilon) 
Example #15
Source File: nmt.py    From nmt with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def debugging_adadelta(lr, tparams, grads, inp, cost):
    zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
    running_up2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rup2'%k) for k, p in tparams.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function(inp, cost, updates=zgup+rg2up, profile=profile)
    
    
    updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
    param_up = [(p, p + ud) for p, ud in zip(itemlist(tparams), updir)]

    f_update = theano.function([lr], [], updates=ru2up+param_up, on_unused_input='ignore', profile=profile)

    return f_grad_shared, f_update 
Example #16
Source File: test_recurrent.py    From CAPTCHA-breaking with MIT License 6 votes vote down vote up
def _runner(layer_class):
    """
    All the recurrent layers share the same interface, so we can run through them with a single
    function.
    """
    for weights in [None, [np.ones((input_dim, output_dim))]]:
        for ret_seq in [True, False]:
            layer = layer_class(input_dim, output_dim, return_sequences=ret_seq, weights=weights)
            layer.input = theano.shared(value=np.ones((nb_samples, timesteps, input_dim)))
            config = layer.get_config()

            for train in [True, False]:
                out = layer.get_output(train).eval()
                # Make sure the output has the desired shape
                if ret_seq:
                    assert(out.shape == (nb_samples, timesteps, output_dim))
                else:
                    assert(out.shape == (nb_samples, output_dim))

                mask = layer.get_output_mask(train) 
Example #17
Source File: test_core.py    From CAPTCHA-breaking with MIT License 6 votes vote down vote up
def test_connections(self):
        nb_samples = 10
        input_dim = 5
        layer1 = core.Layer()
        layer2 = core.Layer()

        input = np.ones((nb_samples, input_dim))
        layer1.input = theano.shared(value=input)

        # As long as there is no previous layer, an error should be raised.
        for train in [True, False]:
            self.assertRaises(AttributeError, layer2.get_input, train)

        # After connecting, input of layer1 should be passed through
        layer2.set_previous(layer1)
        for train in [True, False]:
            assert_allclose(layer2.get_input(train).eval(), input)
            assert_allclose(layer2.get_output(train).eval(), input) 
Example #18
Source File: layers.py    From DL4MT with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def shared_dropout_layer(shape, use_noise, trng, value, scaled=True):
    #re-scale dropout at training time, so we don't need to at test time
    if scaled:
        proj = tensor.switch(
            use_noise,
            trng.binomial(shape, p=value, n=1,
                                        dtype='float32')/value,
            theano.shared(numpy.float32(1.)))
    else:
        proj = tensor.switch(
            use_noise,
            trng.binomial(shape, p=value, n=1,
                                        dtype='float32'),
            theano.shared(numpy.float32(value)))
    return proj


# feedforward layer: affine transformation + point-wise nonlinearity 
Example #19
Source File: updates.py    From iGAN with MIT License 6 votes vote down vote up
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)

            acc = theano.shared(p.get_value() * 0.)
            acc_delta = theano.shared(p.get_value() * 0.)
            acc_new = self.rho * acc + (1 - self.rho) * g ** 2
            updates.append((acc, acc_new))

            update = g * T.sqrt(acc_delta + self.epsilon) / T.sqrt(acc_new + self.epsilon)
            updated_p = p - self.lr * update
            updated_p = self.regularizer.weight_regularize(updated_p)
            updates.append((p, updated_p))

            acc_delta_new = self.rho * acc_delta + (1 - self.rho) * update ** 2
            updates.append((acc_delta, acc_delta_new))
        return updates 
Example #20
Source File: updates.py    From iGAN with MIT License 6 votes vote down vote up
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        t = theano.shared(floatX(1.))
        b1_t = self.b1 * self.l**(t - 1)

        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)

            m_t = b1_t * m + (1 - b1_t) * g
            v_t = self.b2 * v + (1 - self.b2) * g**2
            m_c = m_t / (1 - self.b1**t)
            v_c = v_t / (1 - self.b2**t)
            p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e)
            p_t = self.regularizer.weight_regularize(p_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t))
        updates.append((t, t + 1.))
        return updates 
Example #21
Source File: nn.py    From opt-mmd with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates 
Example #22
Source File: solver.py    From 3D-R2N2 with MIT License 6 votes vote down vote up
def SGD(lr, params, grads, loss):
    """
    Stochastic Gradient Descent w/ momentum
    """
    momentum = cfg.TRAIN.MOMENTUM
    w_decay = cfg.TRAIN.WEIGHT_DECAY

    updates = []
    for param, grad in zip(params, grads):
        vel = theano.shared(param.val.get_value() * 0.)

        if param.is_bias or w_decay == 0:
            regularized_grad = grad
        else:
            regularized_grad = grad + w_decay * param.val

        param_additive = momentum * vel - lr * regularized_grad
        updates.append((vel, param_additive))
        updates.append((param.val, param.val + param_additive))

    return updates 
Example #23
Source File: hgru4rec.py    From hgru4rec with MIT License 6 votes vote down vote up
def adadelta(self, param, grad, updates, sample_idx=None, epsilon=1e-6):
        v1 = np.float32(self.decay)
        v2 = np.float32(1.0 - self.decay)
        acc = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        upd = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        if sample_idx is None:
            acc_new = acc + grad ** 2
            updates[acc] = acc_new
            grad = T.sqrt(upd + epsilon) * grad
            upd_new = v1 * upd + v2 * grad ** 2
            updates[upd] = upd_new
        else:
            acc_s = acc[sample_idx]
            acc_new = acc_s + grad ** 2
            updates[acc] = T.set_subtensor(acc_s, acc_new)
            upd_s = upd[sample_idx]
            upd_new = v1 * upd_s + v2 * grad ** 2
            updates[upd] = T.set_subtensor(upd_s, upd_new)
            grad = T.sqrt(upd_s + epsilon) * grad
        gradient_scaling = T.cast(T.sqrt(acc_new + epsilon), theano.config.floatX)
        return grad / gradient_scaling 
Example #24
Source File: variable_store.py    From spinn with MIT License 6 votes vote down vote up
def add_param(self, name, shape, initializer=None, savable=True, trainable=True):
        if not initializer:
            initializer = self.default_initializer

        if name not in self.vars:
            full_name = "%s/%s" % (self.prefix, name)
            if self.logger:
                self.logger.Log(
                    "Created variable " + full_name + " shape: " + str(shape), level=self.logger.DEBUG)
            init_value = initializer(shape).astype(theano.config.floatX)
            self.vars[name] = theano.shared(init_value,
                                            name=full_name)
            if savable:
                self.savable_vars[name] = self.vars[name]
            if trainable:
                self.trainable_vars[name] = self.vars[name]

        return self.vars[name] 
Example #25
Source File: skipthoughts.py    From StackGAN with MIT License 5 votes vote down vote up
def init_tparams(params):
	"""
	initialize Theano shared variables according to the initial parameters
	"""
	tparams = OrderedDict()
	for kk, pp in params.iteritems():
		tparams[kk] = theano.shared(params[kk], name=kk)
	return tparams 
Example #26
Source File: 5_convolutional_net.py    From Theano-Tutorials with MIT License 5 votes vote down vote up
def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01)) 
Example #27
Source File: 5_convolutional_net.py    From Theano-Tutorials with MIT License 5 votes vote down vote up
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates 
Example #28
Source File: optimizers.py    From DL4MT with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def adadelta(lr, tparams, grads, inp, cost, profile=False):
    zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
                                  name='%s_grad' % k)
                    for k, p in tparams.iteritems()]
    running_up2 = [theano.shared(p.get_value() * numpy.float32(0.),
                                 name='%s_rup2' % k)
                   for k, p in tparams.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.),
                                    name='%s_rgrad2' % k)
                      for k, p in tparams.iteritems()]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
             for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function(inp, cost, updates=zgup+rg2up,
                                    profile=profile)

    updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
             for zg, ru2, rg2 in zip(zipped_grads, running_up2,
                                     running_grads2)]
    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
             for ru2, ud in zip(running_up2, updir)]
    param_up = [(p, p + ud) for p, ud in zip(itemlist(tparams), updir)]

    f_update = theano.function([lr], [], updates=ru2up+param_up,
                               on_unused_input='ignore', profile=profile)

    return f_grad_shared, f_update 
Example #29
Source File: 4_modern_net.py    From Theano-Tutorials with MIT License 5 votes vote down vote up
def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01)) 
Example #30
Source File: 4_modern_net.py    From Theano-Tutorials with MIT License 5 votes vote down vote up
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates