Python theano.tensor.mean() Examples

The following are 30 code examples of theano.tensor.mean(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module theano.tensor , or try the search function

Example #1

Source File: train_toy_25G.py From EvolutionaryGAN with MIT License

6 votes

def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, DIM=64): 
    noise = T.matrix('noise')
    generator = models_uncond.build_generator_toy(noise,nd=DIM)
    Tgimgs = lasagne.layers.get_output(generator)
    Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs)
    
    if loss_type == 'trickLogD':
        generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean()
    elif loss_type == 'minimax': 
        generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean()
    elif loss_type == 'ls': 
        generator_loss = T.mean(T.sqr((Tfake_out - 1)))
    generator_params = lasagne.layers.get_all_params(generator, trainable=True)
    updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1)
    train_g = theano.function([noise],
                              generator_loss,
                              updates=updates_g)
    gen_fn = theano.function([noise],
                         lasagne.layers.get_output(generator,
                              deterministic=True))
    return train_g, gen_fn, generator

Example #2

Source File: mlp_test.py From D-VAE with MIT License

6 votes

def negative_log_likelihood(self, y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
                \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
        # y.shape[0] is (symbolically) the number of rows in y, i.e., number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain [0,1,2,... n-1]
        # T.log(self.p_y_given_x) is a matrix of Log-Probabilities (call it LP) with one row per example and one column per class
        # LP[T.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]]
        # and T.mean(LP[T.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
        return T.log(self.p_y_given_x[T.arange(y.shape[0]), y])

Example #3

Source File: nn.py From opt-mmd with BSD 3-Clause "New" or "Revised" License

6 votes

def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)
        
        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]
        
        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
        else:
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)

Example #4

Source File: nn.py From opt-mmd with BSD 3-Clause "New" or "Revised" License

6 votes

def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic:
            norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args)
        else:
            batch_mean = T.mean(input,axis=self.axes_to_sum).flatten()
            centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args)
            batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten()
            batch_stdv = T.sqrt(1e-6 + batch_var)
            norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args)

            # BN updates
            new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean
            new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var
            self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)]

        if hasattr(self, 'g'):
            activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args)
        else:
            activation = norm_features
        if hasattr(self, 'b'):
            activation += self.b.dimshuffle(*self.dimshuffle_args)

        return self.nonlinearity(activation)

Example #5

Source File: utils_th.py From robust_physical_perturbations with MIT License

6 votes

def model_loss(y, model, mean=True):
    """
    Define loss of Theano graph
    :param y: correct labels
    :param model: output of the model
    :return: return mean of loss if True, otherwise return vector with per
             sample loss
    """
    warnings.warn("CleverHans support for Theano is deprecated and "
                  "will be dropped on 2017-11-08.")

    from_logits = "softmax" not in str(model).lower()

    if from_logits:
        model = T.nnet.softmax(model)

    out = T.nnet.categorical_crossentropy(model, y)

    if mean:
        out = T.mean(out)
    return out

Example #6

Source File: layers.py From 3D-R2N2 with MIT License

6 votes

def __init__(self, prev_layer, n_out, params=None, bias=True):
        super().__init__(prev_layer)
        self._bias = bias
        self._output_shape = list(self._input_shape)
        self._output_shape[-1] = n_out
        self._output_shape = tuple(self._output_shape)

        if params is None:
            self._W_shape = list(self._input_shape[1:])
            self._W_shape.append(n_out)
            self._W_shape = tuple(self._W_shape)
            self.W = Weight(self._W_shape, is_bias=False)
            if bias:
                self.b = Weight(self._output_shape[1:], is_bias=True, mean=0.1, filler='constant')
        else:
            self.W = params[0]
            if bias:
                self.b = params[1]

        # parameters of the model
        self.params = [self.W]
        if bias:
            self.params.append(self.b)

Example #7

Source File: layers.py From 3D-R2N2 with MIT License

6 votes

def __init__(self, prev_layer, n_out, params=None, bias=True):
        super().__init__(prev_layer)
        self._bias = bias
        n_in = self._input_shape[-1]

        if params is None:
            self.W = Weight((n_in, n_out), is_bias=False)
            if bias:
                self.b = Weight((n_out,), is_bias=True, mean=0.1, filler='constant')
        else:
            self.W = params[0]
            if bias:
                self.b = params[1]

        # parameters of the model
        self.params = [self.W]
        if bias:
            self.params.append(self.b)

        self._output_shape = [self._input_shape[0]]
        self._output_shape.extend(self._input_shape[1:-1])
        self._output_shape.append(n_out)

Example #8

Source File: utils_th.py From robust_physical_perturbations with MIT License

6 votes

def model_loss(y, model, mean=True):
    """
    Define loss of Theano graph
    :param y: correct labels
    :param model: output of the model
    :return: return mean of loss if True, otherwise return vector with per
             sample loss
    """
    warnings.warn("CleverHans support for Theano is deprecated and "
                  "will be dropped on 2017-11-08.")

    from_logits = "softmax" not in str(model).lower()

    if from_logits:
        model = T.nnet.softmax(model)

    out = T.nnet.categorical_crossentropy(model, y)

    if mean:
        out = T.mean(out)
    return out

Example #9

Source File: logistic_sgd.py From deeplearn_hsi with BSD 2-Clause "Simplified" License

6 votes

def errors(self, y):
        """Return a float representing the number of errors in the minibatch
        over the total number of examples of the minibatch ; zero one
        loss over the size of the minibatch

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        """

        # check if y has same dimension of y_pred
        if y.ndim != self.y_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred.type)
            )
        # check if y is of the correct datatype
        if y.dtype.startswith('int'):
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction
            return T.mean(T.neq(self.y_pred, y))
        else:
            raise NotImplementedError()

Example #10

Source File: train_face_128.py From EvolutionaryGAN with MIT License

6 votes

def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, ngf=64): 
    noise = T.matrix('noise')
    generator = models_uncond.build_generator_128(noise,ngf=ngf)
    Tgimgs = lasagne.layers.get_output(generator)
    Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs)
    
    if loss_type == 'trickLogD':
        generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean()
    elif loss_type == 'minimax': 
        generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean()
    elif loss_type == 'ls': 
        generator_loss = T.mean(T.sqr((Tfake_out - 1)))
    
    generator_params = lasagne.layers.get_all_params(generator, trainable=True)
    updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1)
    train_g = theano.function([noise],
                              generator_loss,
                              updates=updates_g)
    gen_fn = theano.function([noise],
                         lasagne.layers.get_output(generator,
                              deterministic=True))
    return train_g, gen_fn, generator

Example #11

Source File: train_bedroom_64.py From EvolutionaryGAN with MIT License

6 votes

def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, ngf=64): 
    noise = T.matrix('noise')
    generator = models_uncond.build_generator_64(noise,ngf=ngf)
    Tgimgs = lasagne.layers.get_output(generator)
    Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs)
    
    if loss_type == 'trickLogD':
        generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean()
    elif loss_type == 'minimax': 
        generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean()
    elif loss_type == 'ls': 
        generator_loss = T.mean(T.sqr((Tfake_out - 1)))
    
    generator_params = lasagne.layers.get_all_params(generator, trainable=True)
    updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1)
    train_g = theano.function([noise],
                              generator_loss,
                              updates=updates_g)
    
    gen_fn = theano.function([noise],
                         lasagne.layers.get_output(generator,
                              deterministic=True))
    
    return train_g, gen_fn, generator

Example #12

Source File: train_toy_8G.py From EvolutionaryGAN with MIT License

6 votes

def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, DIM=64): 
    noise = T.matrix('noise')
    generator = models_uncond.build_generator_toy(noise,nd=DIM)
    Tgimgs = lasagne.layers.get_output(generator)
    Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs)
    
    if loss_type == 'trickLogD':
        generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean()
    elif loss_type == 'minimax': 
        generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean()
    elif loss_type == 'ls': 
        generator_loss = T.mean(T.sqr((Tfake_out - 1)))
    generator_params = lasagne.layers.get_all_params(generator, trainable=True)
    updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1)
    train_g = theano.function([noise],
                              generator_loss,
                              updates=updates_g)
    gen_fn = theano.function([noise],
                         lasagne.layers.get_output(generator,
                              deterministic=True))
    return train_g, gen_fn, generator

Example #13

Source File: nn.py From opt-mmd with BSD 3-Clause "New" or "Revised" License

6 votes

def get_output_for(self, input, init=False, deterministic=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.dot(input, self.W)

        if init:
            ma = T.mean(activation, axis=0)
            activation -= ma.dimshuffle('x',0)
            stdv = T.sqrt(T.mean(T.square(activation),axis=0))
            activation /= stdv.dimshuffle('x',0)
            self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)]
        else:
            activation += self.b.dimshuffle('x', 0)

        return self.nonlinearity(activation)

Example #14

Source File: metrics.py From ntm-one-shot with MIT License

6 votes

def accuracy_instance(predictions, targets, n=[1, 2, 3, 4, 5, 10], \
        nb_classes=5, nb_samples_per_class=10, batch_size=1):
    accuracy_0 = theano.shared(np.zeros((batch_size, nb_samples_per_class), \
        dtype=theano.config.floatX))
    indices_0 = theano.shared(np.zeros((batch_size, nb_classes), \
        dtype=np.int32))
    batch_range = T.arange(batch_size)
    def step_(p, t, acc, idx):
        acc = T.inc_subtensor(acc[batch_range, idx[batch_range, t]], T.eq(p, t))
        idx = T.inc_subtensor(idx[batch_range, t], 1)
        return (acc, idx)
    (raw_accuracy, _), _ = theano.foldl(step_, sequences=[predictions.dimshuffle(1, 0), \
        targets.dimshuffle(1, 0)], outputs_info=[accuracy_0, indices_0])
    accuracy = T.mean(raw_accuracy / nb_classes, axis=0)

    return accuracy

Example #15

Source File: fat_classifier.py From spinn with MIT License

6 votes

def build_cost(logits, targets):
    """
    Build a classification cost function.
    """
    # Clip gradients coming from the cost function.
    logits = theano.gradient.grad_clip(
        logits, -1. * FLAGS.clipping_max_value, FLAGS.clipping_max_value)

    predicted_dist = T.nnet.softmax(logits)

    costs = T.nnet.categorical_crossentropy(predicted_dist, targets)
    cost = costs.mean()

    pred = T.argmax(logits, axis=1)
    acc = 1. - T.mean(T.cast(T.neq(pred, targets), theano.config.floatX))

    return cost, acc

Example #16

Source File: classifier.py From spinn with MIT License

6 votes

def build_cost(logits, targets):
    """
    Build a classification cost function.
    """
    # Clip gradients coming from the cost function.
    logits = theano.gradient.grad_clip(
        logits, -1. * FLAGS.clipping_max_value, FLAGS.clipping_max_value)

    predicted_dist = T.nnet.softmax(logits)

    costs = T.nnet.categorical_crossentropy(predicted_dist, targets)
    cost = costs.mean()

    pred = T.argmax(logits, axis=1)
    acc = 1. - T.mean(T.cast(T.neq(pred, targets), theano.config.floatX))

    return cost, acc

Example #17

Source File: rbm_pretraining.py From Projects with MIT License

6 votes

def get_cost_updates(self, lr=0.1, persistent=None, k=1):
        pre_sigmoid_ph, ph_mean, ph_sample = self.sample_h_given_v(self.input)
        if persistent is None:
            chain_start = ph_sample
        else:
            chain_start = persistent
        ([pre_sigmoid_nvs,nv_means,nv_samples,pre_sigmoid_nhs,nh_means,nh_samples],updates) = \
            theano.scan(self.gibbs_step, outputs_info=[None, None, None, None, None, chain_start],n_steps=k,name="gibbs_step")
        chain_end = nv_samples[-1]
        cost = T.mean(self.free_energy(self.input)) - T.mean(self.free_energy(chain_end))
        gparams = T.grad(cost, self.params, consider_constant=[chain_end])
        for gparam, param in zip(gparams, self.params):
            updates[param] = param - gparam * T.cast(lr,dtype=theano.config.floatX)
        if persistent:
            updates[persistent] = nh_samples[-1]
            monitoring_cost = self.get_pseudo_likelihood_cost(updates)  
        else:
            monitoring_cost = self.get_reconstruction_cost(updates,pre_sigmoid_nvs[-1])
        return monitoring_cost, updates

Example #18

Source File: theano_backend.py From Att-ChemdNER with Apache License 2.0

6 votes

def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
    '''Apply batch normalization on x given mean, var, beta and gamma.
    '''
    # TODO remove this if statement when Theano without
    # T.nnet.bn.batch_normalization_test is deprecated
    if not hasattr(T.nnet.bn, 'batch_normalization_test'):
        return _old_batch_normalization(x, mean, var, beta, gamma, epsilon)

    if mean.ndim == 1:
        # based on TensorFlow's default: normalize along rightmost dimension
        reduction_axes = range(x.ndim - 1)
    else:
        reduction_axes = [i for i in range(x.ndim) if mean.broadcastable[i]]

    return T.nnet.bn.batch_normalization_test(
        x, gamma, beta, mean, var, reduction_axes, epsilon)


# TODO remove this function when Theano without
# T.nnet.bn.batch_normalization_train is deprecated

Example #19

Source File: preprocessing.py From Projects with MIT License

6 votes

def __init__(self):
        X_in = T.matrix('X_in')
        u = T.matrix('u')
        s = T.vector('s')
        eps = T.scalar('eps')

        X_ = X_in - T.mean(X_in, 0)
        sigma = T.dot(X_.T, X_) / X_.shape[0]
        self.sigma = theano.function([X_in], sigma, allow_input_downcast=True)

        Z = T.dot(T.dot(u, T.nlinalg.diag(1. / T.sqrt(s + eps))), u.T)
        X_zca = T.dot(X_, Z.T)
        self.compute_zca = theano.function([X_in, u, s, eps], X_zca, allow_input_downcast=True)

        self._u = None
        self._s = None

Example #20

Source File: sparse_gp_theano_internal.py From icml18-jtnn with MIT License

5 votes

def initialize(self):

        input_means = np.array(theano.function([], self.input_means)())

        assert input_means.shape[ 0 ] >= self.n_inducing_points

        selected_points = np.random.choice(input_means.shape[ 0 ], self.n_inducing_points, replace = False)
        z = input_means[ selected_points, : ]

        # If we are not in the first layer, we initialize the length scales to one

        lls = np.zeros(input_means.shape[ 1 ])

        M = np.outer(np.sum(input_means**2, 1), np.ones(input_means.shape[ 0 ]))
        dist = M - 2 * np.dot(input_means, input_means.T) + M.T
        lls = np.log(0.5 * (np.median(dist[ np.triu_indices(input_means.shape[ 0 ], 1) ]) + 1e-3)) * np.ones(input_means.shape[ 1 ])
        
        self.lls.set_value(lls.astype(theano.config.floatX))
        self.z.set_value(z.astype(theano.config.floatX))
        self.lsf.set_value(np.zeros(1).astype(theano.config.floatX)[ 0 ])

        # We initialize the cavity and the posterior approximation to the prior but with a small random
        # mean so that the outputs are not equal to zero (otherwise the output of the gp will be zero and
        # the next layer will be initialized improperly).

        # If we are not in the first layer, we reduce the variance of the L and m

        L = np.random.normal(size = (self.n_inducing_points, self.n_inducing_points)) * 1.0
        m = self.training_targets.get_value()[ selected_points, : ]

        self.LParamPost.set_value(L.astype(theano.config.floatX))
        self.mParamPost.set_value(m.astype(theano.config.floatX))

    # This sets the node for prediction. It basically switches the cavity distribution to be the posterior approximation
    # Once set in this state the network cannot be trained any more.

Example #21

Source File: nn.py From GELUs with MIT License

5 votes

def fit(self, x):
        s = x.shape
        x = x.copy().reshape((s[0],np.prod(s[1:])))
        m = np.mean(x, axis=0)
        x -= m
        sigma = np.dot(x.T,x) / x.shape[0]
        U, S, V = linalg.svd(sigma)
        tmp = np.dot(U, np.diag(1./np.sqrt(S+self.regularization)))
        tmp2 = np.dot(U, np.diag(np.sqrt(S+self.regularization)))
        self.ZCA_mat = th.shared(np.dot(tmp, U.T).astype(th.config.floatX))
        self.inv_ZCA_mat = th.shared(np.dot(tmp2, U.T).astype(th.config.floatX))
        self.mean = th.shared(m.astype(th.config.floatX))

Example #22

Source File: nn.py From GELUs with MIT License

5 votes

def apply(self, x):
        s = x.shape
        if isinstance(x, np.ndarray):
            return np.dot(x.reshape((s[0],np.prod(s[1:]))) - self.mean.get_value(), self.ZCA_mat.get_value()).reshape(s)
        elif isinstance(x, T.TensorVariable):
            return T.dot(x.flatten(2) - self.mean.dimshuffle('x',0), self.ZCA_mat).reshape(s)
        else:
            raise NotImplementedError("Whitening only implemented for numpy arrays or Theano TensorVariables")

Example #23

Source File: nn.py From GELUs with MIT License

5 votes

def invert(self, x):
        s = x.shape
        if isinstance(x, np.ndarray):
            return (np.dot(x.reshape((s[0],np.prod(s[1:]))), self.inv_ZCA_mat.get_value()) + self.mean.get_value()).reshape(s)
        elif isinstance(x, T.TensorVariable):
            return (T.dot(x.flatten(2), self.inv_ZCA_mat) + self.mean.dimshuffle('x',0)).reshape(s)
        else:
            raise NotImplementedError("Whitening only implemented for numpy arrays or Theano TensorVariables")

# T.nnet.relu has some issues with very large inputs, this is more stable

Example #24

Source File: test_basic_ops.py From D-VAE with MIT License

5 votes

def test_elemwise_composite_support_code():
    """
    This was generating an error at compile time.
    Commit 3d1690fa346103594356ecaeceeb2c6757b45d2b fixed that.
    """
    X = tcn.shared_constructor(value=numpy.zeros((100, 10), dtype="float32"),
                               name='X')
    W = tcn.shared_constructor(value=numpy.zeros((10, 1), dtype="float32"),
                               name='W')
    U = T.dot(X, W)
    Y = tcn.shared_constructor(value=numpy.zeros((100, 1), dtype="float32"),
                               name='Y')
    P = T.exp(-(Y - U) ** 2)
    epsilon = numpy.asarray(0.001, dtype="float32")
    NLL = -T.mean(T.log(P + epsilon))  # SupportCodeError
    G = theano.gradient.grad(NLL, wrt=[W])

    backup = theano.config.warn.identify_1pexp_bug
    theano.config.warn.identify_1pexp_bug = False
    try:
        f_grad = theano.function(inputs=[], outputs=G, mode=mode_with_gpu)
    finally:
        theano.config.warn.identify_1pexp_bug = backup
    f_grad()

    topo = f_grad.maker.fgraph.toposort()
    assert sum([isinstance(node.op, T.Elemwise) for node in topo]) == 1
    # I suspect this was failing in the original branch too
    assert sum([isinstance(node.op, tcn.GpuElemwise) for node in topo]) == 1

Example #25

Source File: training_deep_model_VIEW_1.01.py From visually-informed-embedding-of-word-VIEW- with BSD 2-Clause "Simplified" License

5 votes

def hinge2(y_true, y_pred):
    return T.mean((T.mean(T.maximum((1. - (2. * y_true - 1.) * y_pred), 0.), axis=1)))


# Getting the target:

Example #26

Source File: test_nnet.py From D-VAE with MIT License

5 votes

def test_allclose(self):
        m = theano.config.mode
        m = theano.compile.get_mode(m)
        m.check_isfinite = False
        x, y = tensor.matrices('xy')
        # regular softmax and crossentropy
        sm = tensor.nnet.softmax(x)
        cm = tensor.nnet.categorical_crossentropy(sm, y)

        # numerically stable log-softmax with crossentropy
        logsm = tensor.nnet.logsoftmax(x)
        sm2 = tensor.exp(logsm)  # just used to show equivalence with sm
        cm2 = -tensor.sum(y * logsm, axis=1)
        grad = tensor.grad(cm2.mean(), x)

        # create some inputs into a softmax that are large and labels
        a = numpy.exp(10 * numpy.random.rand(5, 10).astype(theano.config.floatX))
        # create some one-hot coded labels
        b = numpy.eye(5, 10).astype(theano.config.floatX)

        # show equivalence of softmax and exponentiated numerically stable
        # log-softmax
        f1 = theano.function([x], [sm, sm2])
        sm_, sm2_ = f1(a)
        utt.assert_allclose(sm_, sm2_)

        # now show that the two versions result in the same crossentropy cost
        # this indicates that the forward function does provide some numerical
        # stability
        f2 = theano.function([x, y], [cm, cm2], mode=m)
        cm_, cm2_ = f2(a, b)
        utt.assert_allclose(cm_, cm2_)

        # now, show that in the standard softmax case the gradients blow up
        # while in the log-softmax case they don't
        f3 = theano.function([x, y], [grad])
        grad_ = f3(a, b)
        assert numpy.all(numpy.isnan(grad_) == False)

Example #27

Source File: omniglot.py From ntm-one-shot with MIT License

5 votes

def omniglot():
    input_var = T.tensor3('input') # input_var has dimensions (batch_size, time, input_dim)
    target_var = T.imatrix('target') # target_var has dimensions (batch_size, time) (label indices)

    # Load data
    generator = OmniglotGenerator(data_folder='./data/omniglot', batch_size=16, \
        nb_samples=5, nb_samples_per_class=10, max_rotation=0., max_shift=0, max_iter=None)

    output_var, output_var_flatten, params = memory_augmented_neural_network(input_var, \
        target_var, batch_size=generator.batch_size, nb_class=generator.nb_samples, \
        memory_shape=(128, 40), controller_size=200, input_size=20 * 20, nb_reads=4)

    cost = T.mean(T.nnet.categorical_crossentropy(output_var_flatten, target_var.flatten()))
    updates = lasagne.updates.adam(cost, params, learning_rate=1e-3)

    accuracies = accuracy_instance(T.argmax(output_var, axis=2), target_var, batch_size=generator.batch_size)

    print('Compiling the model...')
    train_fn = theano.function([input_var, target_var], cost, updates=updates)
    accuracy_fn = theano.function([input_var, target_var], accuracies)
    print('Done')

    print('Training...')
    t0 = time.time()
    all_scores, scores, accs = [], [], np.zeros(generator.nb_samples_per_class)
    try:
        for i, (example_input, example_output) in generator:
            score = train_fn(example_input, example_output)
            acc = accuracy_fn(example_input, example_output)
            all_scores.append(score)
            scores.append(score)
            accs += acc
            if i > 0 and not (i % 100):
                print('Episode %05d: %.6f' % (i, np.mean(score)))
                print(accs / 100.)
                scores, accs = [], np.zeros(generator.nb_samples_per_class)
    except KeyboardInterrupt:
        print(time.time() - t0)
        pass

Example #28

Source File: rbm_pretraining.py From Projects with MIT License

5 votes

def get_pseudo_likelihood_cost(self, updates):
        bit_i_idx = theano.shared(value=0, name='bit_i_idx')
        xi = T.round(self.input)
        fe_xi = self.free_energy(xi)
        xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx])
        fe_xi_flip = self.free_energy(xi_flip)
        cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi)))
        updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible
        return cost

Example #29

Source File: batch_normalization.py From Projects with MIT License

5 votes

def __init__(self,hidden_layers,layer_nodes):
        self.input = T.matrix()
        self.target = T.matrix()
        self.W = []
        self.b = []
        self.lin_outputs = []
        self.batch_norms = []
        self.gammas = []
        self.betas = []
        self.activations = []
        self.W.append(theano.shared(self.ortho_weight(784,layer_nodes),borrow=True))
        self.b.append(theano.shared(np.zeros((layer_nodes,), dtype=theano.config.floatX),borrow=True))
        self.gammas.append(theano.shared(value = np.ones((layer_nodes,), dtype=theano.config.floatX)))
        self.betas.append(theano.shared(value = np.zeros((layer_nodes,), dtype=theano.config.floatX)))
        self.lin_outputs.append(T.dot(self.input,self.W[-1])+self.b[-1])
        self.batch_norms.append(T.nnet.bn.batch_normalization(self.lin_outputs[-1],gamma=self.gammas[-1],beta=self.betas[-1],
            mean=T.mean(self.lin_outputs[-1], axis=0),std=T.sqrt(T.var(self.lin_outputs[-1], axis=0)+0.00001)))
        self.activations.append(T.nnet.sigmoid(self.batch_norms[-1]))
        for layer in range(hidden_layers-1):
            self.W.append(theano.shared(self.ortho_weight(layer_nodes,layer_nodes),borrow=True))
            self.b.append(theano.shared(np.zeros((layer_nodes,), dtype=theano.config.floatX),borrow=True))
            self.gammas.append(theano.shared(value = np.ones((layer_nodes,), dtype=theano.config.floatX)))
            self.betas.append(theano.shared(value = np.zeros((layer_nodes,), dtype=theano.config.floatX)))
            self.lin_outputs.append(T.dot(self.activations[-1],self.W[-1])+self.b[-1])
            self.batch_norms.append(T.nnet.bn.batch_normalization(self.lin_outputs[-1],gamma=self.gammas[-1],beta=self.betas[-1],
                mean=T.mean(self.lin_outputs[-1], axis=0),std=T.sqrt(T.var(self.lin_outputs[-1], axis=0)+0.00001)))
            self.activations.append(T.nnet.sigmoid(self.batch_norms[-1]))
        self.W.append(theano.shared(self.ortho_weight(layer_nodes,10),borrow=True))
        self.b.append(theano.shared(np.zeros((10,), dtype=theano.config.floatX),borrow=True))
        self.gammas.append(theano.shared(value = np.ones((10,), dtype=theano.config.floatX)))
        self.betas.append(theano.shared(value = np.zeros((10,), dtype=theano.config.floatX)))
        self.lin_outputs.append(T.dot(self.activations[-1],self.W[-1])+self.b[-1])
        self.batch_norms.append(T.nnet.bn.batch_normalization(self.lin_outputs[-1],gamma=self.gammas[-1],beta=self.betas[-1],
            mean=T.mean(self.lin_outputs[-1], axis=0),std=T.sqrt(T.var(self.lin_outputs[-1], axis=0)+0.00001)))
        self.activations.append(T.nnet.sigmoid(self.batch_norms[-1]))
        self.cost = T.nnet.categorical_crossentropy(self.activations[-1],self.target).mean()
        self.params = self.W+self.b+self.gammas+self.betas
        self.updates = self.adam(self.cost,self.params)
        self.train_f = theano.function([self.input,self.target],self.cost,updates=self.updates,allow_input_downcast=True)
        self.predict_f = theano.function([self.input],self.activations[-1],allow_input_downcast=True)

Example #30

Source File: theano_backend.py From Att-ChemdNER with Apache License 2.0

5 votes

def mean(x, axis=None, keepdims=False):
    '''Mean of a tensor, alongside the specified axis.
    '''
    dtype = None
    # bool is available since theano v0.9dev
    if 'int' in x.dtype or x.dtype == 'bool':
        dtype = floatx()
    return T.mean(x, axis=axis, keepdims=keepdims, dtype=dtype)