Python theano.tensor.square() Examples

The following are 30 code examples of theano.tensor.square(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module theano.tensor , or try the search function .
Example #1
Source File: core.py    From modular_rl with MIT License 6 votes vote down vote up
def __init__(self, net, mixfrac=1.0, maxiter=25):
        EzPickle.__init__(self, net, mixfrac, maxiter)
        self.net = net
        self.mixfrac = mixfrac

        x_nx = net.input
        self.predict = theano.function([x_nx], net.output, **FNOPTS)

        ypred_ny = net.output
        ytarg_ny = T.matrix("ytarg")
        var_list = net.trainable_weights
        l2 = 1e-3 * T.add(*[T.square(v).sum() for v in var_list])
        N = x_nx.shape[0]
        mse = T.sum(T.square(ytarg_ny - ypred_ny))/N
        symb_args = [x_nx, ytarg_ny]
        loss = mse + l2
        self.opt = LbfgsOptimizer(loss, var_list, symb_args, maxiter=maxiter, extra_losses={"mse":mse, "l2":l2}) 
Example #2
Source File: nn.py    From opt-mmd with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic:
            norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args)
        else:
            batch_mean = T.mean(input,axis=self.axes_to_sum).flatten()
            centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args)
            batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten()
            batch_stdv = T.sqrt(1e-6 + batch_var)
            norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args)

            # BN updates
            new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean
            new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var
            self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)]

        if hasattr(self, 'g'):
            activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args)
        else:
            activation = norm_features
        if hasattr(self, 'b'):
            activation += self.b.dimshuffle(*self.dimshuffle_args)

        return self.nonlinearity(activation) 
Example #3
Source File: nn.py    From GELUs with MIT License 6 votes vote down vote up
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates 
Example #4
Source File: nn.py    From deligan with MIT License 6 votes vote down vote up
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates 
Example #5
Source File: nn.py    From opt-mmd with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates 
Example #6
Source File: mujoco_costs.py    From adversarial-policies with MIT License 6 votes vote down vote up
def __init__(self):
        def f(x, u, i, terminal):
            # x: (batch_size, 10), concatenation of qpos & qvel
            # Gym reward is forward_movement - control_cost.
            #
            # Gym computes the forward movement by taking a difference before
            # and after MuJoCo's step. A MuJoCo step involves numerical
            # integration on qvel. We can't step MuJoCo here, so use qvel
            # directly; this should give the same first-order result.
            forward_movement = x[..., 5]  # qvel[0]

            # We calculate the quadratic control penalty as in Gym.
            if terminal:
                control_penalty = 0
            else:
                control_penalty = 1e-4 * T.square(u).sum(axis=-1)

            cost = -forward_movement + control_penalty
            return cost

        super().__init__(f, state_size=10, action_size=2) 
Example #7
Source File: mujoco_costs.py    From adversarial-policies with MIT License 6 votes vote down vote up
def __init__(self):
        def f(x, u, i, terminal):
            if terminal:
                ctrl_cost = T.zeros_like(x[..., 0])
            else:
                ctrl_cost = T.square(u).sum(axis=-1)

            # x: (batch_size, 8)
            # x[..., 0:4]: qpos
            # x[..., 4:8]: qvel, time derivatives of qpos, not used in the cost.
            theta = x[..., 0]  # qpos[0]: angle of joint 0
            phi = x[..., 1]  # qpos[1]: angle of joint 1
            target_xpos = x[..., 2:4]  # qpos[2:4], target x & y coordinate
            body1_xpos = 0.1 * T.stack([T.cos(theta), T.sin(theta)], axis=1)
            tip_xpos_incr = 0.11 * T.stack([T.cos(phi), T.sin(phi)], axis=1)
            tip_xpos = body1_xpos + tip_xpos_incr
            delta = tip_xpos - target_xpos

            state_cost = T.sqrt(T.sum(delta * delta, axis=-1))
            cost = state_cost + ctrl_cost

            return cost

        super().__init__(f, state_size=8, action_size=2) 
Example #8
Source File: mujoco_costs.py    From adversarial-policies with MIT License 6 votes vote down vote up
def __init__(self):
        def f(x, u, i, terminal):
            if terminal:
                ctrl_cost = T.zeros_like(x[..., 0])
            else:
                ctrl_cost = T.square(u).sum(axis=-1)  # penalize large control

            # x: (batch_size, 4), concatenation of qpos & qvel
            angle = x[..., 1]  # pendulum rotation
            ang_cost = angle * angle  # penalize large angles
            vel = x[..., 2:4]
            vel_cost = T.square(vel).sum(axis=-1)  # penalize large velocities

            # Try and keep the pendulum as upright as possible,
            # without too rapid movement.
            cost = ang_cost + 1e-1 * vel_cost + 1e-1 * ctrl_cost
            return cost

        super().__init__(f, state_size=4, action_size=1) 
Example #9
Source File: nn.py    From opt-mmd with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_output_for(self, input, init=False, deterministic=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.dot(input, self.W)

        if init:
            ma = T.mean(activation, axis=0)
            activation -= ma.dimshuffle('x',0)
            stdv = T.sqrt(T.mean(T.square(activation),axis=0))
            activation /= stdv.dimshuffle('x',0)
            self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)]
        else:
            activation += self.b.dimshuffle('x', 0)

        return self.nonlinearity(activation) 
Example #10
Source File: nn.py    From deligan with MIT License 6 votes vote down vote up
def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic:
            norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args)
        else:
            batch_mean = T.mean(input,axis=self.axes_to_sum).flatten()
            centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args)
            batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten()
            batch_stdv = T.sqrt(1e-6 + batch_var)
            norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args)

            # BN updates
            new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean
            new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var
            self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)]

        if hasattr(self, 'g'):
            activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args)
        else:
            activation = norm_features
        if hasattr(self, 'b'):
            activation += self.b.dimshuffle(*self.dimshuffle_args)

        return self.nonlinearity(activation) 
Example #11
Source File: nn.py    From deligan with MIT License 6 votes vote down vote up
def get_output_for(self, input, init=False, deterministic=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.dot(input, self.W)

        if init:
            ma = T.mean(activation, axis=0)
            activation -= ma.dimshuffle('x',0)
            stdv = T.sqrt(T.mean(T.square(activation),axis=0))
            activation /= stdv.dimshuffle('x',0)
            self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)]
        else:
            activation += self.b.dimshuffle('x', 0)

        return self.nonlinearity(activation) 
Example #12
Source File: nn.py    From deligan with MIT License 6 votes vote down vote up
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates 
Example #13
Source File: nn.py    From deligan with MIT License 6 votes vote down vote up
def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic:
            norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args)
        else:
            batch_mean = T.mean(input,axis=self.axes_to_sum).flatten()
            centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args)
            batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten()
            batch_stdv = T.sqrt(1e-6 + batch_var)
            norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args)

            # BN updates
            new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean
            new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var
            self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)]

        if hasattr(self, 'g'):
            activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args)
        else:
            activation = norm_features
        if hasattr(self, 'b'):
            activation += self.b.dimshuffle(*self.dimshuffle_args)

        return self.nonlinearity(activation) 
Example #14
Source File: nn.py    From deligan with MIT License 6 votes vote down vote up
def l2normalize(layer, train_scale=True):
    W_param = layer.W
    s = W_param.get_value().shape
    if len(s)==4:
        axes_to_sum = (1,2,3)
        dimshuffle_args = [0,'x','x','x']
        k = s[0]
    else:
        axes_to_sum = 0
        dimshuffle_args = ['x',0]
        k = s[1]
    layer.W_scale = layer.add_param(lasagne.init.Constant(1.),
                          (k,), name="W_scale", trainable=train_scale, regularizable=False)
    layer.W = W_param * (layer.W_scale/T.sqrt(1e-6 + T.sum(T.square(W_param),axis=axes_to_sum))).dimshuffle(*dimshuffle_args)
    return layer

# fully connected layer with weight normalization 
Example #15
Source File: nn.py    From deligan with MIT License 6 votes vote down vote up
def get_output_for(self, input, init=False, deterministic=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.dot(input, self.W)

        if init:
            ma = T.mean(activation, axis=0)
            activation -= ma.dimshuffle('x',0)
            stdv = T.sqrt(T.mean(T.square(activation),axis=0))
            activation /= stdv.dimshuffle('x',0)
            self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)]
        else:
            activation += self.b.dimshuffle('x', 0)

        return self.nonlinearity(activation) 
Example #16
Source File: nn.py    From weightnorm with MIT License 6 votes vote down vote up
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates 
Example #17
Source File: nn.py    From weightnorm with MIT License 6 votes vote down vote up
def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic:
            norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args)
        else:
            batch_mean = T.mean(input,axis=self.axes_to_sum).flatten()
            centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args)
            batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten()
            batch_stdv = T.sqrt(1e-6 + batch_var)
            norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args)
            
            # BN updates
            new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean
            new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1.), th.config.floatX)*batch_var
            self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)]
            
        if hasattr(self, 'g'):
            activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args)
        else:
            activation = norm_features
        if hasattr(self, 'b'):
            activation += self.b.dimshuffle(*self.dimshuffle_args)
            
        return self.nonlinearity(activation) 
Example #18
Source File: theano_utils.py    From pysaliency with MIT License 6 votes vote down vote up
def __init__(self, input, centerbias = None, alpha=1.0):
        self.input = input
        if centerbias is None:
            centerbias = np.ones(12)
        self.alpha = theano.shared(value = np.array(alpha).astype(theano.config.floatX), name='alpha')
        self.centerbias_ys = theano.shared(value=np.array(centerbias, dtype=theano.config.floatX), name='centerbias_ys')
        self.centerbias_xs = theano.shared(value=np.linspace(0, 1, len(centerbias), dtype=theano.config.floatX), name='centerbias_xs')

        height = T.cast(input.shape[0], theano.config.floatX)
        width = T.cast(input.shape[1], theano.config.floatX)
        x_coords = (T.arange(width) - 0.5*width) / (0.5*width)
        y_coords = (T.arange(height) - 0.5*height) / (0.5*height) + 0.0001  # We cannot have zeros in there because of grad

        x_coords = x_coords.dimshuffle('x', 0)
        y_coords = y_coords.dimshuffle(0, 'x')

        dists = T.sqrt(T.square(x_coords) + self.alpha*T.square(y_coords))
        self.max_dist = T.sqrt(1 + self.alpha)
        self.dists = dists/self.max_dist

        self.factors = nonlinearity(self.dists, self.centerbias_xs, self.centerbias_ys, len(centerbias))

        apply_centerbias = T.gt(self.centerbias_ys.shape[0], 2)
        self.output = ifelse(apply_centerbias, self.input*self.factors, self.input)
        self.params = [self.centerbias_ys, self.alpha] 
Example #19
Source File: extras.py    From deep-learning-models with MIT License 6 votes vote down vote up
def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic:
            norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args)
        else:
            batch_mean = T.mean(input,axis=self.axes_to_sum).flatten()
            centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args)
            batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten()
            batch_stdv = T.sqrt(1e-6 + batch_var)
            norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args)

            # BN updates
            new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean
            new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var
            self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)]

        if hasattr(self, 'g'):
            activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args)
        else:
            activation = norm_features
        if hasattr(self, 'b'):
            activation += self.b.dimshuffle(*self.dimshuffle_args)

        return self.nonlinearity(activation) 
Example #20
Source File: objectives.py    From kusanagi with MIT License 6 votes vote down vote up
def gaussian_log_likelihood(targets, pred_mean, pred_std=None):
    ''' Computes the log likelihood for gaussian distributed predictions.
        This assumes diagonal covariances
    '''
    delta = pred_mean - targets
    # note that if we have nois be a 1xD vector, broadcasting
    # rules apply
    if pred_std:
        # sum over output dimensions
        lml = -tt.square(delta/pred_std).sum(-1)*0.5 - tt.log(pred_std).sum(-1)
    else:
        # sum ove output dimensions
        lml = -tt.square(delta).sum(-1)*0.5

    # sum over all examples
    return lml.sum() 
Example #21
Source File: steingan_celeba.py    From SteinGAN with MIT License 6 votes vote down vote up
def rbf_kernel(X0):
    XY = T.dot(X0, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)
    
    V = H.flatten()
    
    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])
    
    h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.

    Kxy = T.exp(-H / h ** 2 / 2.0)
    neighbors = T.argsort(H, axis=1)[:, 1]

    return Kxy, neighbors, h 
Example #22
Source File: steingan_lsun.py    From SteinGAN with MIT License 6 votes vote down vote up
def rbf_kernel(X0):
    XY = T.dot(X0, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)
    
    V = H.flatten()
    
    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])
    
    h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.

    Kxy = T.exp(-H / h ** 2 / 2.0)
    neighbors = T.argsort(H, axis=1)[:, 1]

    return Kxy, neighbors, h 
Example #23
Source File: nn.py    From imitation with MIT License 6 votes vote down vote up
def __init__(self, dim, eps=1e-6, init_count=0, init_mean=0., init_meansq=1.):
        '''
        Args:
            dim: dimension of the space of points to be standardized
            eps: small constant to add to denominators to prevent division by 0
            init_count, init_mean, init_meansq: initial values for accumulators

        Note:
            if init_count is 0, then init_mean and init_meansq have no effect beyond
            the first call to update(), which will ignore their values and
            replace them with values from a new batch of data.
        '''
        self._eps = eps
        self._dim = dim
        with variable_scope(type(self).__name__) as self.__varscope:
            self._count = get_variable('count', np.array(float(init_count)), trainable=False)
            self._mean_1_D = get_variable('mean_1_D', np.full((1, self._dim), init_mean), broadcastable=(True,False), trainable=False)
            self._meansq_1_D = get_variable('meansq_1_D', np.full((1, self._dim), init_meansq), broadcastable=(True,False), trainable=False)
        self._stdev_1_D = tensor.sqrt(tensor.nnet.relu(self._meansq_1_D - tensor.square(self._mean_1_D)))
        # Relu ensures inside is nonnegative. maybe the better choice would have been to
        # add self._eps inside the square root, but I'm keeping things this way to preserve
        # backwards compatibility with existing saved models.

        self.get_mean = self._mean_1_D.get_value
        self.get_stdev = theano.function([], self._stdev_1_D[0,:]) # TODO: return with shape (1,D) 
Example #24
Source File: thutil.py    From imitation with MIT License 6 votes vote down vote up
def adam(cost, params, lr, beta1=0.9, beta2=0.999, eps=1e-8):
    updates = []
    grads = tensor.grad(cost, params); assert len(params) == len(grads)
    t0 = theano.shared(np.array(0., dtype=theano.config.floatX))
    t = t0 + 1
    corr1 = (1 - beta1**t)
    corr2 = (1 - beta2**t)
    alpha = lr * tensor.sqrt(corr2) / corr1
    for p, g in zip(params, grads):
        m = theano.shared(value=np.zeros(p.get_value().shape, dtype=theano.config.floatX), broadcastable=p.broadcastable)
        v = theano.shared(value=np.zeros(p.get_value().shape, dtype=theano.config.floatX), broadcastable=p.broadcastable)
        m_t = beta1 * m + (1 - beta1) * g
        v_t = beta2 * v + (1 - beta2) * tensor.square(g)
        p_t = p - alpha * m_t/(tensor.sqrt(v_t) + eps)
        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((t0, t))
    return updates 
Example #25
Source File: nn.py    From salgan with MIT License 6 votes vote down vote up
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates 
Example #26
Source File: nn.py    From salgan with MIT License 6 votes vote down vote up
def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic:
            norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args)
        else:
            batch_mean = T.mean(input,axis=self.axes_to_sum).flatten()
            centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args)
            batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten()
            batch_stdv = T.sqrt(1e-6 + batch_var)
            norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args)

            # BN updates
            new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean
            new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var
            self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)]

        if hasattr(self, 'g'):
            activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args)
        else:
            activation = norm_features
        if hasattr(self, 'b'):
            activation += self.b.dimshuffle(*self.dimshuffle_args)

        return self.nonlinearity(activation) 
Example #27
Source File: nn.py    From salgan with MIT License 6 votes vote down vote up
def get_output_for(self, input, init=False, deterministic=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.dot(input, self.W)

        if init:
            ma = T.mean(activation, axis=0)
            activation -= ma.dimshuffle('x',0)
            stdv = T.sqrt(T.mean(T.square(activation),axis=0))
            activation /= stdv.dimshuffle('x',0)
            self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)]
        else:
            activation += self.b.dimshuffle('x', 0)

        return self.nonlinearity(activation) 
Example #28
Source File: theano_utils.py    From pysaliency with MIT License 6 votes vote down vote up
def __init__(self, input, centerbias = None, alpha=1.0):
        self.input = input
        if centerbias is None:
            centerbias = np.ones(12)
        self.alpha = theano.shared(value = np.array(alpha).astype(theano.config.floatX), name='alpha')
        self.centerbias_ys = theano.shared(value=np.array(centerbias, dtype=theano.config.floatX), name='centerbias_ys')
        self.centerbias_xs = theano.shared(value=np.linspace(0, 1, len(centerbias), dtype=theano.config.floatX), name='centerbias_xs')

        height = T.cast(input.shape[0], theano.config.floatX)
        width = T.cast(input.shape[1], theano.config.floatX)
        x_coords = (T.arange(width) - 0.5*width) / (0.5*width)
        y_coords = (T.arange(height) - 0.5*height) / (0.5*height) + 0.0001  # We cannot have zeros in there because of grad

        x_coords = x_coords.dimshuffle('x', 0)
        y_coords = y_coords.dimshuffle(0, 'x')

        dists = T.sqrt(T.square(x_coords) + self.alpha*T.square(y_coords))
        self.max_dist = T.sqrt(1 + self.alpha)
        self.dists = dists/self.max_dist

        self.factors = nonlinearity(self.dists, self.centerbias_xs, self.centerbias_ys, len(centerbias))

        apply_centerbias = T.gt(self.centerbias_ys.shape[0], 2)
        self.output = ifelse(apply_centerbias, self.input+self.factors, self.input)
        self.params = [self.centerbias_ys, self.alpha] 
Example #29
Source File: solver.py    From 3D-R2N2 with MIT License 5 votes vote down vote up
def ADAM(lr, params, grads, loss, iteration, beta_1=0.9, beta_2=0.999, epsilon=1e-8):
    """
    ADAM update
    """
    t = iteration
    lr_t = lr * T.sqrt(1 - T.pow(beta_2, t)) / (1 - T.pow(beta_1, t))
    w_decay = cfg.TRAIN.WEIGHT_DECAY

    updates = []
    for p, g in zip(params, grads):
        # zero init of moment
        m = theano.shared(p.val.get_value() * 0.)
        # zero init of velocity
        v = theano.shared(p.val.get_value() * 0.)

        if p.is_bias or w_decay == 0:
            regularized_g = g
        else:
            regularized_g = g + w_decay * p.val

        m_t = (beta_1 * m) + (1 - beta_1) * regularized_g
        v_t = (beta_2 * v) + (1 - beta_2) * T.square(regularized_g)
        p_t = p.val - lr_t * m_t / (T.sqrt(v_t) + epsilon)

        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p.val, p_t))

    return updates 
Example #30
Source File: theano_backend.py    From reading-text-in-the-wild with GNU General Public License v3.0 5 votes vote down vote up
def square(x):
    return T.sqr(x)