Python theano.tensor.square() Examples
The following are 30
code examples of theano.tensor.square().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
theano.tensor
, or try the search function
.
Example #1
Source File: core.py From modular_rl with MIT License | 6 votes |
def __init__(self, net, mixfrac=1.0, maxiter=25): EzPickle.__init__(self, net, mixfrac, maxiter) self.net = net self.mixfrac = mixfrac x_nx = net.input self.predict = theano.function([x_nx], net.output, **FNOPTS) ypred_ny = net.output ytarg_ny = T.matrix("ytarg") var_list = net.trainable_weights l2 = 1e-3 * T.add(*[T.square(v).sum() for v in var_list]) N = x_nx.shape[0] mse = T.sum(T.square(ytarg_ny - ypred_ny))/N symb_args = [x_nx, ytarg_ny] loss = mse + l2 self.opt = LbfgsOptimizer(loss, var_list, symb_args, maxiter=maxiter, extra_losses={"mse":mse, "l2":l2})
Example #2
Source File: nn.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_output_for(self, input, deterministic=False, **kwargs): if deterministic: norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args) else: batch_mean = T.mean(input,axis=self.axes_to_sum).flatten() centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args) batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten() batch_stdv = T.sqrt(1e-6 + batch_var) norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args) # BN updates new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)] if hasattr(self, 'g'): activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args) else: activation = norm_features if hasattr(self, 'b'): activation += self.b.dimshuffle(*self.dimshuffle_args) return self.nonlinearity(activation)
Example #3
Source File: nn.py From GELUs with MIT License | 6 votes |
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999): updates = [] grads = T.grad(cost, params) t = th.shared(np.cast[th.config.floatX](1.)) for p, g in zip(params, grads): v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) v_t = mom1*v + (1. - mom1)*g mg_t = mom2*mg + (1. - mom2)*T.square(g) v_hat = v_t / (1. - mom1 ** t) mg_hat = mg_t / (1. - mom2 ** t) g_t = v_hat / T.sqrt(mg_hat + 1e-8) p_t = p - lr * g_t updates.append((v, v_t)) updates.append((mg, mg_t)) updates.append((p, p_t)) updates.append((t, t+1)) return updates
Example #4
Source File: nn.py From deligan with MIT License | 6 votes |
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999): updates = [] grads = T.grad(cost, params) t = th.shared(np.cast[th.config.floatX](1.)) for p, g in zip(params, grads): v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) v_t = mom1*v + (1. - mom1)*g mg_t = mom2*mg + (1. - mom2)*T.square(g) v_hat = v_t / (1. - mom1 ** t) mg_hat = mg_t / (1. - mom2 ** t) g_t = v_hat / T.sqrt(mg_hat + 1e-8) p_t = p - lr * g_t updates.append((v, v_t)) updates.append((mg, mg_t)) updates.append((p, p_t)) updates.append((t, t+1)) return updates
Example #5
Source File: nn.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 6 votes |
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999): updates = [] grads = T.grad(cost, params) t = th.shared(np.cast[th.config.floatX](1.)) for p, g in zip(params, grads): v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) v_t = mom1*v + (1. - mom1)*g mg_t = mom2*mg + (1. - mom2)*T.square(g) v_hat = v_t / (1. - mom1 ** t) mg_hat = mg_t / (1. - mom2 ** t) g_t = v_hat / T.sqrt(mg_hat + 1e-8) p_t = p - lr * g_t updates.append((v, v_t)) updates.append((mg, mg_t)) updates.append((p, p_t)) updates.append((t, t+1)) return updates
Example #6
Source File: mujoco_costs.py From adversarial-policies with MIT License | 6 votes |
def __init__(self): def f(x, u, i, terminal): # x: (batch_size, 10), concatenation of qpos & qvel # Gym reward is forward_movement - control_cost. # # Gym computes the forward movement by taking a difference before # and after MuJoCo's step. A MuJoCo step involves numerical # integration on qvel. We can't step MuJoCo here, so use qvel # directly; this should give the same first-order result. forward_movement = x[..., 5] # qvel[0] # We calculate the quadratic control penalty as in Gym. if terminal: control_penalty = 0 else: control_penalty = 1e-4 * T.square(u).sum(axis=-1) cost = -forward_movement + control_penalty return cost super().__init__(f, state_size=10, action_size=2)
Example #7
Source File: mujoco_costs.py From adversarial-policies with MIT License | 6 votes |
def __init__(self): def f(x, u, i, terminal): if terminal: ctrl_cost = T.zeros_like(x[..., 0]) else: ctrl_cost = T.square(u).sum(axis=-1) # x: (batch_size, 8) # x[..., 0:4]: qpos # x[..., 4:8]: qvel, time derivatives of qpos, not used in the cost. theta = x[..., 0] # qpos[0]: angle of joint 0 phi = x[..., 1] # qpos[1]: angle of joint 1 target_xpos = x[..., 2:4] # qpos[2:4], target x & y coordinate body1_xpos = 0.1 * T.stack([T.cos(theta), T.sin(theta)], axis=1) tip_xpos_incr = 0.11 * T.stack([T.cos(phi), T.sin(phi)], axis=1) tip_xpos = body1_xpos + tip_xpos_incr delta = tip_xpos - target_xpos state_cost = T.sqrt(T.sum(delta * delta, axis=-1)) cost = state_cost + ctrl_cost return cost super().__init__(f, state_size=8, action_size=2)
Example #8
Source File: mujoco_costs.py From adversarial-policies with MIT License | 6 votes |
def __init__(self): def f(x, u, i, terminal): if terminal: ctrl_cost = T.zeros_like(x[..., 0]) else: ctrl_cost = T.square(u).sum(axis=-1) # penalize large control # x: (batch_size, 4), concatenation of qpos & qvel angle = x[..., 1] # pendulum rotation ang_cost = angle * angle # penalize large angles vel = x[..., 2:4] vel_cost = T.square(vel).sum(axis=-1) # penalize large velocities # Try and keep the pendulum as upright as possible, # without too rapid movement. cost = ang_cost + 1e-1 * vel_cost + 1e-1 * ctrl_cost return cost super().__init__(f, state_size=4, action_size=1)
Example #9
Source File: nn.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_output_for(self, input, init=False, deterministic=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.dot(input, self.W) if init: ma = T.mean(activation, axis=0) activation -= ma.dimshuffle('x',0) stdv = T.sqrt(T.mean(T.square(activation),axis=0)) activation /= stdv.dimshuffle('x',0) self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)] else: activation += self.b.dimshuffle('x', 0) return self.nonlinearity(activation)
Example #10
Source File: nn.py From deligan with MIT License | 6 votes |
def get_output_for(self, input, deterministic=False, **kwargs): if deterministic: norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args) else: batch_mean = T.mean(input,axis=self.axes_to_sum).flatten() centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args) batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten() batch_stdv = T.sqrt(1e-6 + batch_var) norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args) # BN updates new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)] if hasattr(self, 'g'): activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args) else: activation = norm_features if hasattr(self, 'b'): activation += self.b.dimshuffle(*self.dimshuffle_args) return self.nonlinearity(activation)
Example #11
Source File: nn.py From deligan with MIT License | 6 votes |
def get_output_for(self, input, init=False, deterministic=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.dot(input, self.W) if init: ma = T.mean(activation, axis=0) activation -= ma.dimshuffle('x',0) stdv = T.sqrt(T.mean(T.square(activation),axis=0)) activation /= stdv.dimshuffle('x',0) self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)] else: activation += self.b.dimshuffle('x', 0) return self.nonlinearity(activation)
Example #12
Source File: nn.py From deligan with MIT License | 6 votes |
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999): updates = [] grads = T.grad(cost, params) t = th.shared(np.cast[th.config.floatX](1.)) for p, g in zip(params, grads): v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) v_t = mom1*v + (1. - mom1)*g mg_t = mom2*mg + (1. - mom2)*T.square(g) v_hat = v_t / (1. - mom1 ** t) mg_hat = mg_t / (1. - mom2 ** t) g_t = v_hat / T.sqrt(mg_hat + 1e-8) p_t = p - lr * g_t updates.append((v, v_t)) updates.append((mg, mg_t)) updates.append((p, p_t)) updates.append((t, t+1)) return updates
Example #13
Source File: nn.py From deligan with MIT License | 6 votes |
def get_output_for(self, input, deterministic=False, **kwargs): if deterministic: norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args) else: batch_mean = T.mean(input,axis=self.axes_to_sum).flatten() centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args) batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten() batch_stdv = T.sqrt(1e-6 + batch_var) norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args) # BN updates new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)] if hasattr(self, 'g'): activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args) else: activation = norm_features if hasattr(self, 'b'): activation += self.b.dimshuffle(*self.dimshuffle_args) return self.nonlinearity(activation)
Example #14
Source File: nn.py From deligan with MIT License | 6 votes |
def l2normalize(layer, train_scale=True): W_param = layer.W s = W_param.get_value().shape if len(s)==4: axes_to_sum = (1,2,3) dimshuffle_args = [0,'x','x','x'] k = s[0] else: axes_to_sum = 0 dimshuffle_args = ['x',0] k = s[1] layer.W_scale = layer.add_param(lasagne.init.Constant(1.), (k,), name="W_scale", trainable=train_scale, regularizable=False) layer.W = W_param * (layer.W_scale/T.sqrt(1e-6 + T.sum(T.square(W_param),axis=axes_to_sum))).dimshuffle(*dimshuffle_args) return layer # fully connected layer with weight normalization
Example #15
Source File: nn.py From deligan with MIT License | 6 votes |
def get_output_for(self, input, init=False, deterministic=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.dot(input, self.W) if init: ma = T.mean(activation, axis=0) activation -= ma.dimshuffle('x',0) stdv = T.sqrt(T.mean(T.square(activation),axis=0)) activation /= stdv.dimshuffle('x',0) self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)] else: activation += self.b.dimshuffle('x', 0) return self.nonlinearity(activation)
Example #16
Source File: nn.py From weightnorm with MIT License | 6 votes |
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999): updates = [] grads = T.grad(cost, params) t = th.shared(np.cast[th.config.floatX](1.)) for p, g in zip(params, grads): v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) v_t = mom1*v + (1. - mom1)*g mg_t = mom2*mg + (1. - mom2)*T.square(g) v_hat = v_t / (1. - mom1 ** t) mg_hat = mg_t / (1. - mom2 ** t) g_t = v_hat / T.sqrt(mg_hat + 1e-8) p_t = p - lr * g_t updates.append((v, v_t)) updates.append((mg, mg_t)) updates.append((p, p_t)) updates.append((t, t+1)) return updates
Example #17
Source File: nn.py From weightnorm with MIT License | 6 votes |
def get_output_for(self, input, deterministic=False, **kwargs): if deterministic: norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args) else: batch_mean = T.mean(input,axis=self.axes_to_sum).flatten() centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args) batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten() batch_stdv = T.sqrt(1e-6 + batch_var) norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args) # BN updates new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1.), th.config.floatX)*batch_var self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)] if hasattr(self, 'g'): activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args) else: activation = norm_features if hasattr(self, 'b'): activation += self.b.dimshuffle(*self.dimshuffle_args) return self.nonlinearity(activation)
Example #18
Source File: theano_utils.py From pysaliency with MIT License | 6 votes |
def __init__(self, input, centerbias = None, alpha=1.0): self.input = input if centerbias is None: centerbias = np.ones(12) self.alpha = theano.shared(value = np.array(alpha).astype(theano.config.floatX), name='alpha') self.centerbias_ys = theano.shared(value=np.array(centerbias, dtype=theano.config.floatX), name='centerbias_ys') self.centerbias_xs = theano.shared(value=np.linspace(0, 1, len(centerbias), dtype=theano.config.floatX), name='centerbias_xs') height = T.cast(input.shape[0], theano.config.floatX) width = T.cast(input.shape[1], theano.config.floatX) x_coords = (T.arange(width) - 0.5*width) / (0.5*width) y_coords = (T.arange(height) - 0.5*height) / (0.5*height) + 0.0001 # We cannot have zeros in there because of grad x_coords = x_coords.dimshuffle('x', 0) y_coords = y_coords.dimshuffle(0, 'x') dists = T.sqrt(T.square(x_coords) + self.alpha*T.square(y_coords)) self.max_dist = T.sqrt(1 + self.alpha) self.dists = dists/self.max_dist self.factors = nonlinearity(self.dists, self.centerbias_xs, self.centerbias_ys, len(centerbias)) apply_centerbias = T.gt(self.centerbias_ys.shape[0], 2) self.output = ifelse(apply_centerbias, self.input*self.factors, self.input) self.params = [self.centerbias_ys, self.alpha]
Example #19
Source File: extras.py From deep-learning-models with MIT License | 6 votes |
def get_output_for(self, input, deterministic=False, **kwargs): if deterministic: norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args) else: batch_mean = T.mean(input,axis=self.axes_to_sum).flatten() centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args) batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten() batch_stdv = T.sqrt(1e-6 + batch_var) norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args) # BN updates new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)] if hasattr(self, 'g'): activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args) else: activation = norm_features if hasattr(self, 'b'): activation += self.b.dimshuffle(*self.dimshuffle_args) return self.nonlinearity(activation)
Example #20
Source File: objectives.py From kusanagi with MIT License | 6 votes |
def gaussian_log_likelihood(targets, pred_mean, pred_std=None): ''' Computes the log likelihood for gaussian distributed predictions. This assumes diagonal covariances ''' delta = pred_mean - targets # note that if we have nois be a 1xD vector, broadcasting # rules apply if pred_std: # sum over output dimensions lml = -tt.square(delta/pred_std).sum(-1)*0.5 - tt.log(pred_std).sum(-1) else: # sum ove output dimensions lml = -tt.square(delta).sum(-1)*0.5 # sum over all examples return lml.sum()
Example #21
Source File: steingan_celeba.py From SteinGAN with MIT License | 6 votes |
def rbf_kernel(X0): XY = T.dot(X0, X0.transpose()) x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1)) X2e = T.repeat(x2, X0.shape[0], axis=1) H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY) V = H.flatten() # median distance h = T.switch(T.eq((V.shape[0] % 2), 0), # if even vector T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]), # if odd vector T.sort(V)[V.shape[0] // 2]) h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2. Kxy = T.exp(-H / h ** 2 / 2.0) neighbors = T.argsort(H, axis=1)[:, 1] return Kxy, neighbors, h
Example #22
Source File: steingan_lsun.py From SteinGAN with MIT License | 6 votes |
def rbf_kernel(X0): XY = T.dot(X0, X0.transpose()) x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1)) X2e = T.repeat(x2, X0.shape[0], axis=1) H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY) V = H.flatten() # median distance h = T.switch(T.eq((V.shape[0] % 2), 0), # if even vector T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]), # if odd vector T.sort(V)[V.shape[0] // 2]) h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2. Kxy = T.exp(-H / h ** 2 / 2.0) neighbors = T.argsort(H, axis=1)[:, 1] return Kxy, neighbors, h
Example #23
Source File: nn.py From imitation with MIT License | 6 votes |
def __init__(self, dim, eps=1e-6, init_count=0, init_mean=0., init_meansq=1.): ''' Args: dim: dimension of the space of points to be standardized eps: small constant to add to denominators to prevent division by 0 init_count, init_mean, init_meansq: initial values for accumulators Note: if init_count is 0, then init_mean and init_meansq have no effect beyond the first call to update(), which will ignore their values and replace them with values from a new batch of data. ''' self._eps = eps self._dim = dim with variable_scope(type(self).__name__) as self.__varscope: self._count = get_variable('count', np.array(float(init_count)), trainable=False) self._mean_1_D = get_variable('mean_1_D', np.full((1, self._dim), init_mean), broadcastable=(True,False), trainable=False) self._meansq_1_D = get_variable('meansq_1_D', np.full((1, self._dim), init_meansq), broadcastable=(True,False), trainable=False) self._stdev_1_D = tensor.sqrt(tensor.nnet.relu(self._meansq_1_D - tensor.square(self._mean_1_D))) # Relu ensures inside is nonnegative. maybe the better choice would have been to # add self._eps inside the square root, but I'm keeping things this way to preserve # backwards compatibility with existing saved models. self.get_mean = self._mean_1_D.get_value self.get_stdev = theano.function([], self._stdev_1_D[0,:]) # TODO: return with shape (1,D)
Example #24
Source File: thutil.py From imitation with MIT License | 6 votes |
def adam(cost, params, lr, beta1=0.9, beta2=0.999, eps=1e-8): updates = [] grads = tensor.grad(cost, params); assert len(params) == len(grads) t0 = theano.shared(np.array(0., dtype=theano.config.floatX)) t = t0 + 1 corr1 = (1 - beta1**t) corr2 = (1 - beta2**t) alpha = lr * tensor.sqrt(corr2) / corr1 for p, g in zip(params, grads): m = theano.shared(value=np.zeros(p.get_value().shape, dtype=theano.config.floatX), broadcastable=p.broadcastable) v = theano.shared(value=np.zeros(p.get_value().shape, dtype=theano.config.floatX), broadcastable=p.broadcastable) m_t = beta1 * m + (1 - beta1) * g v_t = beta2 * v + (1 - beta2) * tensor.square(g) p_t = p - alpha * m_t/(tensor.sqrt(v_t) + eps) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t0, t)) return updates
Example #25
Source File: nn.py From salgan with MIT License | 6 votes |
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999): updates = [] grads = T.grad(cost, params) t = th.shared(np.cast[th.config.floatX](1.)) for p, g in zip(params, grads): v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) v_t = mom1*v + (1. - mom1)*g mg_t = mom2*mg + (1. - mom2)*T.square(g) v_hat = v_t / (1. - mom1 ** t) mg_hat = mg_t / (1. - mom2 ** t) g_t = v_hat / T.sqrt(mg_hat + 1e-8) p_t = p - lr * g_t updates.append((v, v_t)) updates.append((mg, mg_t)) updates.append((p, p_t)) updates.append((t, t+1)) return updates
Example #26
Source File: nn.py From salgan with MIT License | 6 votes |
def get_output_for(self, input, deterministic=False, **kwargs): if deterministic: norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args) else: batch_mean = T.mean(input,axis=self.axes_to_sum).flatten() centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args) batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten() batch_stdv = T.sqrt(1e-6 + batch_var) norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args) # BN updates new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)] if hasattr(self, 'g'): activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args) else: activation = norm_features if hasattr(self, 'b'): activation += self.b.dimshuffle(*self.dimshuffle_args) return self.nonlinearity(activation)
Example #27
Source File: nn.py From salgan with MIT License | 6 votes |
def get_output_for(self, input, init=False, deterministic=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.dot(input, self.W) if init: ma = T.mean(activation, axis=0) activation -= ma.dimshuffle('x',0) stdv = T.sqrt(T.mean(T.square(activation),axis=0)) activation /= stdv.dimshuffle('x',0) self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)] else: activation += self.b.dimshuffle('x', 0) return self.nonlinearity(activation)
Example #28
Source File: theano_utils.py From pysaliency with MIT License | 6 votes |
def __init__(self, input, centerbias = None, alpha=1.0): self.input = input if centerbias is None: centerbias = np.ones(12) self.alpha = theano.shared(value = np.array(alpha).astype(theano.config.floatX), name='alpha') self.centerbias_ys = theano.shared(value=np.array(centerbias, dtype=theano.config.floatX), name='centerbias_ys') self.centerbias_xs = theano.shared(value=np.linspace(0, 1, len(centerbias), dtype=theano.config.floatX), name='centerbias_xs') height = T.cast(input.shape[0], theano.config.floatX) width = T.cast(input.shape[1], theano.config.floatX) x_coords = (T.arange(width) - 0.5*width) / (0.5*width) y_coords = (T.arange(height) - 0.5*height) / (0.5*height) + 0.0001 # We cannot have zeros in there because of grad x_coords = x_coords.dimshuffle('x', 0) y_coords = y_coords.dimshuffle(0, 'x') dists = T.sqrt(T.square(x_coords) + self.alpha*T.square(y_coords)) self.max_dist = T.sqrt(1 + self.alpha) self.dists = dists/self.max_dist self.factors = nonlinearity(self.dists, self.centerbias_xs, self.centerbias_ys, len(centerbias)) apply_centerbias = T.gt(self.centerbias_ys.shape[0], 2) self.output = ifelse(apply_centerbias, self.input+self.factors, self.input) self.params = [self.centerbias_ys, self.alpha]
Example #29
Source File: solver.py From 3D-R2N2 with MIT License | 5 votes |
def ADAM(lr, params, grads, loss, iteration, beta_1=0.9, beta_2=0.999, epsilon=1e-8): """ ADAM update """ t = iteration lr_t = lr * T.sqrt(1 - T.pow(beta_2, t)) / (1 - T.pow(beta_1, t)) w_decay = cfg.TRAIN.WEIGHT_DECAY updates = [] for p, g in zip(params, grads): # zero init of moment m = theano.shared(p.val.get_value() * 0.) # zero init of velocity v = theano.shared(p.val.get_value() * 0.) if p.is_bias or w_decay == 0: regularized_g = g else: regularized_g = g + w_decay * p.val m_t = (beta_1 * m) + (1 - beta_1) * regularized_g v_t = (beta_2 * v) + (1 - beta_2) * T.square(regularized_g) p_t = p.val - lr_t * m_t / (T.sqrt(v_t) + epsilon) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p.val, p_t)) return updates
Example #30
Source File: theano_backend.py From reading-text-in-the-wild with GNU General Public License v3.0 | 5 votes |
def square(x): return T.sqr(x)