Python theano.tensor.mean() Examples
The following are 30
code examples of theano.tensor.mean().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
theano.tensor
, or try the search function
.
Example #1
Source File: train_toy_25G.py From EvolutionaryGAN with MIT License | 6 votes |
def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, DIM=64): noise = T.matrix('noise') generator = models_uncond.build_generator_toy(noise,nd=DIM) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) if loss_type == 'trickLogD': generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() elif loss_type == 'minimax': generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() elif loss_type == 'ls': generator_loss = T.mean(T.sqr((Tfake_out - 1))) generator_params = lasagne.layers.get_all_params(generator, trainable=True) updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1) train_g = theano.function([noise], generator_loss, updates=updates_g) gen_fn = theano.function([noise], lasagne.layers.get_output(generator, deterministic=True)) return train_g, gen_fn, generator
Example #2
Source File: mlp_test.py From D-VAE with MIT License | 6 votes |
def negative_log_likelihood(self, y): """Return the mean of the negative log-likelihood of the prediction of this model under a given target distribution. .. math:: \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) = \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\ \ell (\theta=\{W,b\}, \mathcal{D}) :type y: theano.tensor.TensorType :param y: corresponds to a vector that gives for each example the correct label Note: we use the mean instead of the sum so that the learning rate is less dependent on the batch size """ # y.shape[0] is (symbolically) the number of rows in y, i.e., number of examples (call it n) in the minibatch # T.arange(y.shape[0]) is a symbolic vector which will contain [0,1,2,... n-1] # T.log(self.p_y_given_x) is a matrix of Log-Probabilities (call it LP) with one row per example and one column per class # LP[T.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]] # and T.mean(LP[T.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v, # i.e., the mean log-likelihood across the minibatch. return T.log(self.p_y_given_x[T.arange(y.shape[0]), y])
Example #3
Source File: nn.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_output_for(self, input, init=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.tensordot(input, self.W, [[1], [0]]) abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2) + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1)) if init: mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0) abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x') self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))] f = T.sum(T.exp(-abs_dif),axis=2) if init: mf = T.mean(f,axis=0) f -= mf.dimshuffle('x',0) self.init_updates.append((self.b, -mf)) else: f += self.b.dimshuffle('x',0) return T.concatenate([input, f], axis=1)
Example #4
Source File: nn.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_output_for(self, input, deterministic=False, **kwargs): if deterministic: norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args) else: batch_mean = T.mean(input,axis=self.axes_to_sum).flatten() centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args) batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten() batch_stdv = T.sqrt(1e-6 + batch_var) norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args) # BN updates new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)] if hasattr(self, 'g'): activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args) else: activation = norm_features if hasattr(self, 'b'): activation += self.b.dimshuffle(*self.dimshuffle_args) return self.nonlinearity(activation)
Example #5
Source File: utils_th.py From robust_physical_perturbations with MIT License | 6 votes |
def model_loss(y, model, mean=True): """ Define loss of Theano graph :param y: correct labels :param model: output of the model :return: return mean of loss if True, otherwise return vector with per sample loss """ warnings.warn("CleverHans support for Theano is deprecated and " "will be dropped on 2017-11-08.") from_logits = "softmax" not in str(model).lower() if from_logits: model = T.nnet.softmax(model) out = T.nnet.categorical_crossentropy(model, y) if mean: out = T.mean(out) return out
Example #6
Source File: layers.py From 3D-R2N2 with MIT License | 6 votes |
def __init__(self, prev_layer, n_out, params=None, bias=True): super().__init__(prev_layer) self._bias = bias self._output_shape = list(self._input_shape) self._output_shape[-1] = n_out self._output_shape = tuple(self._output_shape) if params is None: self._W_shape = list(self._input_shape[1:]) self._W_shape.append(n_out) self._W_shape = tuple(self._W_shape) self.W = Weight(self._W_shape, is_bias=False) if bias: self.b = Weight(self._output_shape[1:], is_bias=True, mean=0.1, filler='constant') else: self.W = params[0] if bias: self.b = params[1] # parameters of the model self.params = [self.W] if bias: self.params.append(self.b)
Example #7
Source File: layers.py From 3D-R2N2 with MIT License | 6 votes |
def __init__(self, prev_layer, n_out, params=None, bias=True): super().__init__(prev_layer) self._bias = bias n_in = self._input_shape[-1] if params is None: self.W = Weight((n_in, n_out), is_bias=False) if bias: self.b = Weight((n_out,), is_bias=True, mean=0.1, filler='constant') else: self.W = params[0] if bias: self.b = params[1] # parameters of the model self.params = [self.W] if bias: self.params.append(self.b) self._output_shape = [self._input_shape[0]] self._output_shape.extend(self._input_shape[1:-1]) self._output_shape.append(n_out)
Example #8
Source File: utils_th.py From robust_physical_perturbations with MIT License | 6 votes |
def model_loss(y, model, mean=True): """ Define loss of Theano graph :param y: correct labels :param model: output of the model :return: return mean of loss if True, otherwise return vector with per sample loss """ warnings.warn("CleverHans support for Theano is deprecated and " "will be dropped on 2017-11-08.") from_logits = "softmax" not in str(model).lower() if from_logits: model = T.nnet.softmax(model) out = T.nnet.categorical_crossentropy(model, y) if mean: out = T.mean(out) return out
Example #9
Source File: logistic_sgd.py From deeplearn_hsi with BSD 2-Clause "Simplified" License | 6 votes |
def errors(self, y): """Return a float representing the number of errors in the minibatch over the total number of examples of the minibatch ; zero one loss over the size of the minibatch :type y: theano.tensor.TensorType :param y: corresponds to a vector that gives for each example the correct label """ # check if y has same dimension of y_pred if y.ndim != self.y_pred.ndim: raise TypeError( 'y should have the same shape as self.y_pred', ('y', y.type, 'y_pred', self.y_pred.type) ) # check if y is of the correct datatype if y.dtype.startswith('int'): # the T.neq operator returns a vector of 0s and 1s, where 1 # represents a mistake in prediction return T.mean(T.neq(self.y_pred, y)) else: raise NotImplementedError()
Example #10
Source File: train_face_128.py From EvolutionaryGAN with MIT License | 6 votes |
def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, ngf=64): noise = T.matrix('noise') generator = models_uncond.build_generator_128(noise,ngf=ngf) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) if loss_type == 'trickLogD': generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() elif loss_type == 'minimax': generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() elif loss_type == 'ls': generator_loss = T.mean(T.sqr((Tfake_out - 1))) generator_params = lasagne.layers.get_all_params(generator, trainable=True) updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1) train_g = theano.function([noise], generator_loss, updates=updates_g) gen_fn = theano.function([noise], lasagne.layers.get_output(generator, deterministic=True)) return train_g, gen_fn, generator
Example #11
Source File: train_bedroom_64.py From EvolutionaryGAN with MIT License | 6 votes |
def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, ngf=64): noise = T.matrix('noise') generator = models_uncond.build_generator_64(noise,ngf=ngf) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) if loss_type == 'trickLogD': generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() elif loss_type == 'minimax': generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() elif loss_type == 'ls': generator_loss = T.mean(T.sqr((Tfake_out - 1))) generator_params = lasagne.layers.get_all_params(generator, trainable=True) updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1) train_g = theano.function([noise], generator_loss, updates=updates_g) gen_fn = theano.function([noise], lasagne.layers.get_output(generator, deterministic=True)) return train_g, gen_fn, generator
Example #12
Source File: train_toy_8G.py From EvolutionaryGAN with MIT License | 6 votes |
def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, DIM=64): noise = T.matrix('noise') generator = models_uncond.build_generator_toy(noise,nd=DIM) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) if loss_type == 'trickLogD': generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() elif loss_type == 'minimax': generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() elif loss_type == 'ls': generator_loss = T.mean(T.sqr((Tfake_out - 1))) generator_params = lasagne.layers.get_all_params(generator, trainable=True) updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1) train_g = theano.function([noise], generator_loss, updates=updates_g) gen_fn = theano.function([noise], lasagne.layers.get_output(generator, deterministic=True)) return train_g, gen_fn, generator
Example #13
Source File: nn.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_output_for(self, input, init=False, deterministic=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.dot(input, self.W) if init: ma = T.mean(activation, axis=0) activation -= ma.dimshuffle('x',0) stdv = T.sqrt(T.mean(T.square(activation),axis=0)) activation /= stdv.dimshuffle('x',0) self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)] else: activation += self.b.dimshuffle('x', 0) return self.nonlinearity(activation)
Example #14
Source File: metrics.py From ntm-one-shot with MIT License | 6 votes |
def accuracy_instance(predictions, targets, n=[1, 2, 3, 4, 5, 10], \ nb_classes=5, nb_samples_per_class=10, batch_size=1): accuracy_0 = theano.shared(np.zeros((batch_size, nb_samples_per_class), \ dtype=theano.config.floatX)) indices_0 = theano.shared(np.zeros((batch_size, nb_classes), \ dtype=np.int32)) batch_range = T.arange(batch_size) def step_(p, t, acc, idx): acc = T.inc_subtensor(acc[batch_range, idx[batch_range, t]], T.eq(p, t)) idx = T.inc_subtensor(idx[batch_range, t], 1) return (acc, idx) (raw_accuracy, _), _ = theano.foldl(step_, sequences=[predictions.dimshuffle(1, 0), \ targets.dimshuffle(1, 0)], outputs_info=[accuracy_0, indices_0]) accuracy = T.mean(raw_accuracy / nb_classes, axis=0) return accuracy
Example #15
Source File: fat_classifier.py From spinn with MIT License | 6 votes |
def build_cost(logits, targets): """ Build a classification cost function. """ # Clip gradients coming from the cost function. logits = theano.gradient.grad_clip( logits, -1. * FLAGS.clipping_max_value, FLAGS.clipping_max_value) predicted_dist = T.nnet.softmax(logits) costs = T.nnet.categorical_crossentropy(predicted_dist, targets) cost = costs.mean() pred = T.argmax(logits, axis=1) acc = 1. - T.mean(T.cast(T.neq(pred, targets), theano.config.floatX)) return cost, acc
Example #16
Source File: classifier.py From spinn with MIT License | 6 votes |
def build_cost(logits, targets): """ Build a classification cost function. """ # Clip gradients coming from the cost function. logits = theano.gradient.grad_clip( logits, -1. * FLAGS.clipping_max_value, FLAGS.clipping_max_value) predicted_dist = T.nnet.softmax(logits) costs = T.nnet.categorical_crossentropy(predicted_dist, targets) cost = costs.mean() pred = T.argmax(logits, axis=1) acc = 1. - T.mean(T.cast(T.neq(pred, targets), theano.config.floatX)) return cost, acc
Example #17
Source File: rbm_pretraining.py From Projects with MIT License | 6 votes |
def get_cost_updates(self, lr=0.1, persistent=None, k=1): pre_sigmoid_ph, ph_mean, ph_sample = self.sample_h_given_v(self.input) if persistent is None: chain_start = ph_sample else: chain_start = persistent ([pre_sigmoid_nvs,nv_means,nv_samples,pre_sigmoid_nhs,nh_means,nh_samples],updates) = \ theano.scan(self.gibbs_step, outputs_info=[None, None, None, None, None, chain_start],n_steps=k,name="gibbs_step") chain_end = nv_samples[-1] cost = T.mean(self.free_energy(self.input)) - T.mean(self.free_energy(chain_end)) gparams = T.grad(cost, self.params, consider_constant=[chain_end]) for gparam, param in zip(gparams, self.params): updates[param] = param - gparam * T.cast(lr,dtype=theano.config.floatX) if persistent: updates[persistent] = nh_samples[-1] monitoring_cost = self.get_pseudo_likelihood_cost(updates) else: monitoring_cost = self.get_reconstruction_cost(updates,pre_sigmoid_nvs[-1]) return monitoring_cost, updates
Example #18
Source File: theano_backend.py From Att-ChemdNER with Apache License 2.0 | 6 votes |
def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3): '''Apply batch normalization on x given mean, var, beta and gamma. ''' # TODO remove this if statement when Theano without # T.nnet.bn.batch_normalization_test is deprecated if not hasattr(T.nnet.bn, 'batch_normalization_test'): return _old_batch_normalization(x, mean, var, beta, gamma, epsilon) if mean.ndim == 1: # based on TensorFlow's default: normalize along rightmost dimension reduction_axes = range(x.ndim - 1) else: reduction_axes = [i for i in range(x.ndim) if mean.broadcastable[i]] return T.nnet.bn.batch_normalization_test( x, gamma, beta, mean, var, reduction_axes, epsilon) # TODO remove this function when Theano without # T.nnet.bn.batch_normalization_train is deprecated
Example #19
Source File: preprocessing.py From Projects with MIT License | 6 votes |
def __init__(self): X_in = T.matrix('X_in') u = T.matrix('u') s = T.vector('s') eps = T.scalar('eps') X_ = X_in - T.mean(X_in, 0) sigma = T.dot(X_.T, X_) / X_.shape[0] self.sigma = theano.function([X_in], sigma, allow_input_downcast=True) Z = T.dot(T.dot(u, T.nlinalg.diag(1. / T.sqrt(s + eps))), u.T) X_zca = T.dot(X_, Z.T) self.compute_zca = theano.function([X_in, u, s, eps], X_zca, allow_input_downcast=True) self._u = None self._s = None
Example #20
Source File: sparse_gp_theano_internal.py From icml18-jtnn with MIT License | 5 votes |
def initialize(self): input_means = np.array(theano.function([], self.input_means)()) assert input_means.shape[ 0 ] >= self.n_inducing_points selected_points = np.random.choice(input_means.shape[ 0 ], self.n_inducing_points, replace = False) z = input_means[ selected_points, : ] # If we are not in the first layer, we initialize the length scales to one lls = np.zeros(input_means.shape[ 1 ]) M = np.outer(np.sum(input_means**2, 1), np.ones(input_means.shape[ 0 ])) dist = M - 2 * np.dot(input_means, input_means.T) + M.T lls = np.log(0.5 * (np.median(dist[ np.triu_indices(input_means.shape[ 0 ], 1) ]) + 1e-3)) * np.ones(input_means.shape[ 1 ]) self.lls.set_value(lls.astype(theano.config.floatX)) self.z.set_value(z.astype(theano.config.floatX)) self.lsf.set_value(np.zeros(1).astype(theano.config.floatX)[ 0 ]) # We initialize the cavity and the posterior approximation to the prior but with a small random # mean so that the outputs are not equal to zero (otherwise the output of the gp will be zero and # the next layer will be initialized improperly). # If we are not in the first layer, we reduce the variance of the L and m L = np.random.normal(size = (self.n_inducing_points, self.n_inducing_points)) * 1.0 m = self.training_targets.get_value()[ selected_points, : ] self.LParamPost.set_value(L.astype(theano.config.floatX)) self.mParamPost.set_value(m.astype(theano.config.floatX)) # This sets the node for prediction. It basically switches the cavity distribution to be the posterior approximation # Once set in this state the network cannot be trained any more.
Example #21
Source File: nn.py From GELUs with MIT License | 5 votes |
def fit(self, x): s = x.shape x = x.copy().reshape((s[0],np.prod(s[1:]))) m = np.mean(x, axis=0) x -= m sigma = np.dot(x.T,x) / x.shape[0] U, S, V = linalg.svd(sigma) tmp = np.dot(U, np.diag(1./np.sqrt(S+self.regularization))) tmp2 = np.dot(U, np.diag(np.sqrt(S+self.regularization))) self.ZCA_mat = th.shared(np.dot(tmp, U.T).astype(th.config.floatX)) self.inv_ZCA_mat = th.shared(np.dot(tmp2, U.T).astype(th.config.floatX)) self.mean = th.shared(m.astype(th.config.floatX))
Example #22
Source File: nn.py From GELUs with MIT License | 5 votes |
def apply(self, x): s = x.shape if isinstance(x, np.ndarray): return np.dot(x.reshape((s[0],np.prod(s[1:]))) - self.mean.get_value(), self.ZCA_mat.get_value()).reshape(s) elif isinstance(x, T.TensorVariable): return T.dot(x.flatten(2) - self.mean.dimshuffle('x',0), self.ZCA_mat).reshape(s) else: raise NotImplementedError("Whitening only implemented for numpy arrays or Theano TensorVariables")
Example #23
Source File: nn.py From GELUs with MIT License | 5 votes |
def invert(self, x): s = x.shape if isinstance(x, np.ndarray): return (np.dot(x.reshape((s[0],np.prod(s[1:]))), self.inv_ZCA_mat.get_value()) + self.mean.get_value()).reshape(s) elif isinstance(x, T.TensorVariable): return (T.dot(x.flatten(2), self.inv_ZCA_mat) + self.mean.dimshuffle('x',0)).reshape(s) else: raise NotImplementedError("Whitening only implemented for numpy arrays or Theano TensorVariables") # T.nnet.relu has some issues with very large inputs, this is more stable
Example #24
Source File: test_basic_ops.py From D-VAE with MIT License | 5 votes |
def test_elemwise_composite_support_code(): """ This was generating an error at compile time. Commit 3d1690fa346103594356ecaeceeb2c6757b45d2b fixed that. """ X = tcn.shared_constructor(value=numpy.zeros((100, 10), dtype="float32"), name='X') W = tcn.shared_constructor(value=numpy.zeros((10, 1), dtype="float32"), name='W') U = T.dot(X, W) Y = tcn.shared_constructor(value=numpy.zeros((100, 1), dtype="float32"), name='Y') P = T.exp(-(Y - U) ** 2) epsilon = numpy.asarray(0.001, dtype="float32") NLL = -T.mean(T.log(P + epsilon)) # SupportCodeError G = theano.gradient.grad(NLL, wrt=[W]) backup = theano.config.warn.identify_1pexp_bug theano.config.warn.identify_1pexp_bug = False try: f_grad = theano.function(inputs=[], outputs=G, mode=mode_with_gpu) finally: theano.config.warn.identify_1pexp_bug = backup f_grad() topo = f_grad.maker.fgraph.toposort() assert sum([isinstance(node.op, T.Elemwise) for node in topo]) == 1 # I suspect this was failing in the original branch too assert sum([isinstance(node.op, tcn.GpuElemwise) for node in topo]) == 1
Example #25
Source File: training_deep_model_VIEW_1.01.py From visually-informed-embedding-of-word-VIEW- with BSD 2-Clause "Simplified" License | 5 votes |
def hinge2(y_true, y_pred): return T.mean((T.mean(T.maximum((1. - (2. * y_true - 1.) * y_pred), 0.), axis=1))) # Getting the target:
Example #26
Source File: test_nnet.py From D-VAE with MIT License | 5 votes |
def test_allclose(self): m = theano.config.mode m = theano.compile.get_mode(m) m.check_isfinite = False x, y = tensor.matrices('xy') # regular softmax and crossentropy sm = tensor.nnet.softmax(x) cm = tensor.nnet.categorical_crossentropy(sm, y) # numerically stable log-softmax with crossentropy logsm = tensor.nnet.logsoftmax(x) sm2 = tensor.exp(logsm) # just used to show equivalence with sm cm2 = -tensor.sum(y * logsm, axis=1) grad = tensor.grad(cm2.mean(), x) # create some inputs into a softmax that are large and labels a = numpy.exp(10 * numpy.random.rand(5, 10).astype(theano.config.floatX)) # create some one-hot coded labels b = numpy.eye(5, 10).astype(theano.config.floatX) # show equivalence of softmax and exponentiated numerically stable # log-softmax f1 = theano.function([x], [sm, sm2]) sm_, sm2_ = f1(a) utt.assert_allclose(sm_, sm2_) # now show that the two versions result in the same crossentropy cost # this indicates that the forward function does provide some numerical # stability f2 = theano.function([x, y], [cm, cm2], mode=m) cm_, cm2_ = f2(a, b) utt.assert_allclose(cm_, cm2_) # now, show that in the standard softmax case the gradients blow up # while in the log-softmax case they don't f3 = theano.function([x, y], [grad]) grad_ = f3(a, b) assert numpy.all(numpy.isnan(grad_) == False)
Example #27
Source File: omniglot.py From ntm-one-shot with MIT License | 5 votes |
def omniglot(): input_var = T.tensor3('input') # input_var has dimensions (batch_size, time, input_dim) target_var = T.imatrix('target') # target_var has dimensions (batch_size, time) (label indices) # Load data generator = OmniglotGenerator(data_folder='./data/omniglot', batch_size=16, \ nb_samples=5, nb_samples_per_class=10, max_rotation=0., max_shift=0, max_iter=None) output_var, output_var_flatten, params = memory_augmented_neural_network(input_var, \ target_var, batch_size=generator.batch_size, nb_class=generator.nb_samples, \ memory_shape=(128, 40), controller_size=200, input_size=20 * 20, nb_reads=4) cost = T.mean(T.nnet.categorical_crossentropy(output_var_flatten, target_var.flatten())) updates = lasagne.updates.adam(cost, params, learning_rate=1e-3) accuracies = accuracy_instance(T.argmax(output_var, axis=2), target_var, batch_size=generator.batch_size) print('Compiling the model...') train_fn = theano.function([input_var, target_var], cost, updates=updates) accuracy_fn = theano.function([input_var, target_var], accuracies) print('Done') print('Training...') t0 = time.time() all_scores, scores, accs = [], [], np.zeros(generator.nb_samples_per_class) try: for i, (example_input, example_output) in generator: score = train_fn(example_input, example_output) acc = accuracy_fn(example_input, example_output) all_scores.append(score) scores.append(score) accs += acc if i > 0 and not (i % 100): print('Episode %05d: %.6f' % (i, np.mean(score))) print(accs / 100.) scores, accs = [], np.zeros(generator.nb_samples_per_class) except KeyboardInterrupt: print(time.time() - t0) pass
Example #28
Source File: rbm_pretraining.py From Projects with MIT License | 5 votes |
def get_pseudo_likelihood_cost(self, updates): bit_i_idx = theano.shared(value=0, name='bit_i_idx') xi = T.round(self.input) fe_xi = self.free_energy(xi) xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx]) fe_xi_flip = self.free_energy(xi_flip) cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi))) updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible return cost
Example #29
Source File: batch_normalization.py From Projects with MIT License | 5 votes |
def __init__(self,hidden_layers,layer_nodes): self.input = T.matrix() self.target = T.matrix() self.W = [] self.b = [] self.lin_outputs = [] self.batch_norms = [] self.gammas = [] self.betas = [] self.activations = [] self.W.append(theano.shared(self.ortho_weight(784,layer_nodes),borrow=True)) self.b.append(theano.shared(np.zeros((layer_nodes,), dtype=theano.config.floatX),borrow=True)) self.gammas.append(theano.shared(value = np.ones((layer_nodes,), dtype=theano.config.floatX))) self.betas.append(theano.shared(value = np.zeros((layer_nodes,), dtype=theano.config.floatX))) self.lin_outputs.append(T.dot(self.input,self.W[-1])+self.b[-1]) self.batch_norms.append(T.nnet.bn.batch_normalization(self.lin_outputs[-1],gamma=self.gammas[-1],beta=self.betas[-1], mean=T.mean(self.lin_outputs[-1], axis=0),std=T.sqrt(T.var(self.lin_outputs[-1], axis=0)+0.00001))) self.activations.append(T.nnet.sigmoid(self.batch_norms[-1])) for layer in range(hidden_layers-1): self.W.append(theano.shared(self.ortho_weight(layer_nodes,layer_nodes),borrow=True)) self.b.append(theano.shared(np.zeros((layer_nodes,), dtype=theano.config.floatX),borrow=True)) self.gammas.append(theano.shared(value = np.ones((layer_nodes,), dtype=theano.config.floatX))) self.betas.append(theano.shared(value = np.zeros((layer_nodes,), dtype=theano.config.floatX))) self.lin_outputs.append(T.dot(self.activations[-1],self.W[-1])+self.b[-1]) self.batch_norms.append(T.nnet.bn.batch_normalization(self.lin_outputs[-1],gamma=self.gammas[-1],beta=self.betas[-1], mean=T.mean(self.lin_outputs[-1], axis=0),std=T.sqrt(T.var(self.lin_outputs[-1], axis=0)+0.00001))) self.activations.append(T.nnet.sigmoid(self.batch_norms[-1])) self.W.append(theano.shared(self.ortho_weight(layer_nodes,10),borrow=True)) self.b.append(theano.shared(np.zeros((10,), dtype=theano.config.floatX),borrow=True)) self.gammas.append(theano.shared(value = np.ones((10,), dtype=theano.config.floatX))) self.betas.append(theano.shared(value = np.zeros((10,), dtype=theano.config.floatX))) self.lin_outputs.append(T.dot(self.activations[-1],self.W[-1])+self.b[-1]) self.batch_norms.append(T.nnet.bn.batch_normalization(self.lin_outputs[-1],gamma=self.gammas[-1],beta=self.betas[-1], mean=T.mean(self.lin_outputs[-1], axis=0),std=T.sqrt(T.var(self.lin_outputs[-1], axis=0)+0.00001))) self.activations.append(T.nnet.sigmoid(self.batch_norms[-1])) self.cost = T.nnet.categorical_crossentropy(self.activations[-1],self.target).mean() self.params = self.W+self.b+self.gammas+self.betas self.updates = self.adam(self.cost,self.params) self.train_f = theano.function([self.input,self.target],self.cost,updates=self.updates,allow_input_downcast=True) self.predict_f = theano.function([self.input],self.activations[-1],allow_input_downcast=True)
Example #30
Source File: theano_backend.py From Att-ChemdNER with Apache License 2.0 | 5 votes |
def mean(x, axis=None, keepdims=False): '''Mean of a tensor, alongside the specified axis. ''' dtype = None # bool is available since theano v0.9dev if 'int' in x.dtype or x.dtype == 'bool': dtype = floatx() return T.mean(x, axis=axis, keepdims=keepdims, dtype=dtype)