Python theano.tensor.maximum() Examples

The following are 30 code examples of theano.tensor.maximum(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module theano.tensor , or try the search function .
Example #1
Source File:    From Att-ChemdNER with Apache License 2.0 6 votes vote down vote up
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        log_p_curr[:active_next] + updated_log_p_prev
    return active_next, log_p_next 
Example #2
Source File:    From GELUs with MIT License 6 votes vote down vote up
def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    for p, g in zip(params, grads):
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        if mom1>0:
            v_t = mom1*v + (1. - mom1)*g
            v_t = g
        mg_t = T.maximum(mom2*mg, abs(g))
        g_t = v_t / (mg_t + 1e-6)
        p_t = p - lr * g_t
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    return updates 
Example #3
Source File:    From Deep_MRI_brain_extraction with MIT License 6 votes vote down vote up
def max_pool_along_channel_axis(sym_input, pool_factor):
    """ for 3D conv."""
    s = None
    for i in xrange(pool_factor):
        t = sym_input[:,:,i::pool_factor]
        if s is None:
            s = t
            s = T.maximum(s, t)
    return s
#    Ns, Ts, C, Hs, Ws = 1, 70, 1, 70, 70  -> 70^3
#    Nf, Tf, C, Hf, Wf = 32, 5 , 1, 5 , 5  -> 32 filters of shape 5^3
#    signals = numpy.arange(Ns*Ts*C*Hs*Ws).reshape(Ns, Ts, C, Hs, Ws).astype('float32')
#    filters = numpy.arange(Nf*Tf*C*Hf*Wf).reshape(Nf, Tf, C, Hf, Wf).astype('float32')
# in 3D
#        input:  (1, 70,  3, 70, 70)
#       filters: (32, 5 , 3,  5 , 5)
#    --> output: (1, 66, 32, 66, 66) 
Example #4
Source File:    From dl4ir-webnav with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def compute_emb(x, W):

    def _step(xi, emb, W):
        if prm.att_doc:
            new_shape = (xi.shape[0], xi.shape[1], xi.shape[2], prm.dim_emb)
            new_shape = (xi.shape[0], xi.shape[1], prm.dim_emb)

        out = W[xi.flatten()].reshape(new_shape).sum(-2)
        return out / tensor.maximum(1., tensor.neq(xi,-1).astype('float32').sum(-1, keepdims=True))

    if prm.att_doc:
        emb_init = tensor.alloc(0., x.shape[1], x.shape[2], prm.dim_emb)
        emb_init = tensor.alloc(0., x.shape[1], prm.dim_emb)

    (embs), scan_updates = theano.scan(_step,

    return embs 
Example #5
Source File:    From Deep_MRI_brain_extraction with MIT License 6 votes vote down vote up
def my_max_pool_2d(sym_input, pool_shape = (2,2)):
    """ this one is pure theano. Hence all gradient-related stuff is working! No dimshuffling"""

    s = None
    for i in xrange(pool_shape[1]):
        t = sym_input[:,:,:,i::pool_shape[1]]
        if s is None:
            s = t
            s = T.maximum(s, t)

    temp = s
    s = None
    for i in xrange(pool_shape[0]):
        t = temp[:,:,i::pool_shape[0],:]
        if s is None:
            s = t
            s = T.maximum(s, t)

    sym_ret = s

    return sym_ret 
Example #6
Source File:    From D-VAE with MIT License 6 votes vote down vote up
def max_pool_2d_same_size(input, patch_size):
    Takes as input a 4-D tensor. It sets all non maximum values
    of non-overlapping patches of size (patch_size[0],patch_size[1]) to zero,
    keeping only the maximum values. The output has the same dimensions as
    the input.

    input : 4-D theano tensor of input images
        Input images. Max pooling will be done over the 2 last dimensions.
    patch_size : tuple of length 2
        Size of the patch (patch height, patch width).
        (2,2) will retain only one non-zero value per patch of 4 values.

    output = Pool(patch_size, True)(input)
    outs = MaxPoolGrad(patch_size, True)(input, output, output)
    return outs 
Example #7
Source File:    From iaf with MIT License 6 votes vote down vote up
def AdaMax(w, objective, alpha=.01, beta1=.1, beta2=.001):
    print 'AdaMax', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2
    g = T.grad(objective.sum(), w, disconnected_inputs='warn')
    new = OrderedDict()
    for i in range(len(w)):
        #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's
        mom1 = G.sharedf(w[i].get_value() * 0.)
        _max = G.sharedf(w[i].get_value() * 0.)
        new[mom1] = (1-beta1) * mom1 + beta1 * g[i]
        new[_max] = T.maximum((1-beta2)*_max, abs(g[i]) + 1e-8)
        new[w[i]] = w[i] + alpha *  new[mom1] / new[_max]
    return new

# AdaMax that averages over multiple minibatches 
Example #8
Source File:    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        log_p_curr[:active_next] + updated_log_p_prev
    return active_next, log_p_next 
Example #9
Source File:    From adversarial with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_noise(self, size):

        # Allow just requesting batch size
        if isinstance(size, int):
            size = (size, self.get_input_space().get_total_dimension())

        if not hasattr(self, 'noise'):
            self.noise = "gaussian"
        if self.noise == "uniform":
            return self.theano_rng.uniform(low=-np.sqrt(3), high=np.sqrt(3), size=size, dtype='float32')
        elif self.noise == "gaussian":
            return self.theano_rng.normal(size=size, dtype='float32')
        elif self.noise == "spherical":
            noise = self.theano_rng.normal(size=size, dtype='float32')
            noise = noise / T.maximum(1e-7, T.sqrt(T.sqr(noise).sum(axis=1))).dimshuffle(0, 'x')
            return noise
            raise NotImplementedError(self.noise) 
Example #10
Source File:    From weightnorm with MIT License 6 votes vote down vote up
def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    for p, g in zip(params, grads):
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        if mom1>0:
            v_t = mom1*v + (1. - mom1)*g
            v_t = g
        mg_t = T.maximum(mom2*mg, abs(g))
        g_t = v_t / (mg_t + 1e-6)
        p_t = p - lr * g_t
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    return updates 
Example #11
Source File:    From attention-lvcsr with MIT License 6 votes vote down vote up
def compute_weights(self, energies, attended_mask):
        if self.energy_normalizer == 'softmax':
            logger.debug("Using softmax attention weights normalization")
            energies = energies - energies.max(axis=0)
            unnormalized_weights = tensor.exp(energies)
        elif self.energy_normalizer == 'logistic':
            logger.debug("Using smoothfocus (logistic sigm) "
                        "attention weights normalization")
            unnormalized_weights = tensor.nnet.sigmoid(energies)
        elif self.energy_normalizer == 'relu':
            logger.debug("Using ReLU attention weights normalization")
            unnormalized_weights = tensor.maximum(energies/1000., 0.0)
            raise Exception("Unknown energey_normalizer: {}"
        if attended_mask:
            unnormalized_weights *= attended_mask

        # If mask consists of all zeros use 1 as the normalization coefficient
        normalization = (unnormalized_weights.sum(axis=0) +
                         tensor.all(1 - attended_mask, axis=0))
        return unnormalized_weights / normalization 
Example #12
Source File:    From Depth-Map-Prediction with GNU General Public License v3.0 6 votes vote down vote up
def define_cost(self, pred, y0, m0):
        bsize = self.bsize
        npix = int([1:]))
        y0_target = y0.reshape((self.bsize, npix))
        y0_mask = m0.reshape((self.bsize, npix))
        pred = pred.reshape((self.bsize, npix))

        p = pred * y0_mask
        t = y0_target * y0_mask

        d = (p - t)

        nvalid_pix = T.sum(y0_mask, axis=1)
        depth_cost = (T.sum(nvalid_pix * T.sum(d**2, axis=1))
                         - 0.5*T.sum(T.sum(d, axis=1)**2)) \
                     / T.maximum(T.sum(nvalid_pix**2), 1)

        return depth_cost 
Example #13
Source File:    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def l2_normalize(x, axis=None):
    square_sum = T.sum(T.square(x), axis=axis, keepdims=True)
    norm = T.sqrt(T.maximum(square_sum, epsilon()))
    return x / norm 
Example #14
Source File:    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def maximum(x, y):
    return T.maximum(x, y) 
Example #15
Source File:    From sars_tutorial with MIT License 5 votes vote down vote up
def relu(self, X):
        return T.maximum(X, 0) 
Example #16
Source File:    From sars_tutorial with MIT License 5 votes vote down vote up
def relu(self, X):
        return T.maximum(X, 0) 
Example #17
Source File:    From Deep_MRI_brain_extraction with MIT License 5 votes vote down vote up
def get_reconstructed_input(self, hidden):
    """ Computes the reconstructed input given the values of the hidden layer """
    repeated_conv = conv.conv2d(input = hidden, filters = self.W_prime, border_mode='full')

    multiple_conv_out = [repeated_conv.flatten()] *

    stacked_conv_neibs = T.stack(*multiple_conv_out).T

    stretch_unpooling_out = theano.sandbox.neighbours.neibs2images(stacked_conv_neibs,, self.x.shape)

    rectified_linear_activation = lambda x: T.maximum(0.0, x)
    return rectified_linear_activation(stretch_unpooling_out + self.b_prime.dimshuffle('x', 0, 'x', 'x')) 
Example #18
Source File:    From weightnorm with MIT License 5 votes vote down vote up
def relu(x):
    return T.maximum(x, 0) 
Example #19
Source File:    From attention-lvcsr with MIT License 5 votes vote down vote up
def monotonicity_penalty(weights, mask_x=None):
    cumsums = tensor.cumsum(weights, axis=2)
    penalties = tensor.maximum(cumsums[1:] - cumsums[:-1], 0).sum(axis=2)
    if mask_x:
        penalties *= mask_x[1:]
    return penalties.sum() 
Example #20
Source File:    From Deep_MRI_brain_extraction with MIT License 5 votes vote down vote up
def max_pool_along_second_axis(sym_input, pool_factor):
    """ for MLP and 2D conv"""
    s = None
    for i in xrange(pool_factor):
        t = sym_input[:,i::pool_factor]
        if s is None:
            s = t
            s = T.maximum(s, t)
    return s 
Example #21
Source File:    From dcgan_code with MIT License 5 votes vote down vote up
def Hinge(y_true, y_pred):
    return T.maximum(1. - y_true * y_pred, 0.).mean() 
Example #22
Source File:    From dcgan_code with MIT License 5 votes vote down vote up
def SquaredHinge(y_true, y_pred):
    return T.sqr(T.maximum(1. - y_true * y_pred, 0.)).mean() 
Example #23
Source File:    From iaf with MIT License 5 votes vote down vote up
def AdaMaxAvg2(ws, objective, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, n_accum=1):
    if n_accum == 1:
        return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3)
    print 'AdaMax_Avg2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3,'n_accum:',n_accum
    gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise')

    new = OrderedDict()
    from theano.ifelse import ifelse
    it = G.sharedf(0.)
    new[it] = it + 1
    reset = T.eq(T.mod(it,n_accum), 0)
    update = T.eq(T.mod(it,n_accum), n_accum-1)
    ws_avg = []
    for j in range(len(ws)):
        w_avg = {}
        for i in ws[j]:
            _w = ws[j][i]
            _g = gs[j][i]
            #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's
            mom1 = G.sharedf(_w.get_value() * 0.)
            _max = G.sharedf(_w.get_value() * 0.)
            w_avg[i] = G.sharedf(_w.get_value())
            g_sum = G.sharedf(_w.get_value() * 0.)
            new[g_sum] = ifelse(reset, _g, g_sum + _g)
            new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1)
            new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max)
            new[_w] = ifelse(update, _w + alpha *  new[mom1] / new[_max], _w)
            new[w_avg[i]] = ifelse(update, beta3 * new[_w] + (1.-beta3) * w_avg[i], w_avg[i])
        ws_avg += [w_avg]   
    return new, ws_avg 
Example #24
Source File:    From iaf with MIT License 5 votes vote down vote up
def Eve(w, w_avg, f, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, disconnected_inputs='raise'):
    print 'Eve', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3

    mom = {}
    _max = {}
    delta = {}
    w_prime = {}
    for i in w:
        mom[i] = G.sharedf(w[i].get_value() * 0.)
        _max[i] = G.sharedf(w[i].get_value() * 0. + 1e-8)
        delta[i] = G.sharedf(w[i].get_value() * 0.)
        w_prime[i] = w[i] + (1-beta1)/beta1 * delta[i]
    train_cost = f(w_prime).mean()
    g = G.ndict.T_grad(train_cost, w, disconnected_inputs=disconnected_inputs) #warn/raise
    new = OrderedDict()
    for i in w:
        new[mom[i]] = (1-beta1) * mom[i] + beta1 * g[i]
        new[_max[i]] = T.maximum((1-beta2)*_max[i], abs(g[i]) + 1e-8)
        new[delta[i]] = alpha * new[mom[i]] / new[_max[i]]
        new[w[i]] = w[i] + new[delta[i]]
    for i in w:
        new[w_avg[i]] = beta3 * w[i] + (1.-beta3) * w_avg[i]
    return train_cost, new
# AdaMax that keeps running average of parameter
# Accumulates gradient over n_accum minibatches 
Example #25
Source File:    From iaf with MIT License 5 votes vote down vote up
def AdaMaxAvg(ws, ws_avg, objective, alpha=.01, beta1=.1, beta2=.001, update_keys=None, disconnected_inputs='raise'):
    print 'AdaMax_Avg', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2
    gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs=disconnected_inputs) #warn/raise
    if update_keys is None:
        update_keys = [ws[j].keys() for j in range(len(ws))]
    new = OrderedDict()
    for j in range(len(ws)):
        if ws_avg is not None:
            w_avg = ws_avg[j]
        for i in update_keys[j]:
            _w = ws[j][i]
            _g = gs[j][i]
            #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's
            mom1 = G.sharedf(_w.get_value() * 0.)
            _max = G.sharedf(_w.get_value() * 0. + 1e-8)
            new[mom1] = (1-beta1) * mom1 + beta1 * _g
            new[_max] = T.maximum((1-beta2)*_max, abs(_g) + 1e-8)
            new[_w] = _w + alpha *  new[mom1] / new[_max]
            if ws_avg is not None:
                new[w_avg[i]] = beta2 * _w + (1.-beta2) * w_avg[i]
    return new

# Eve that keeps running average of parameter 
Example #26
Source File:    From iaf with MIT License 5 votes vote down vote up
def AdaMax2(w, objective, alpha=.01, beta1=.1, beta2=.001, n_accum=2):
    print 'AdaMax2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2, 'n_accum:', n_accum
    g = T.grad(objective.sum(), w, disconnected_inputs='warn')
    new = OrderedDict()
    from theano.ifelse import ifelse
    it = G.sharedf(0.)
    new[it] = it + 1
    reset = T.eq(T.mod(new[it],n_accum), 0)
    update = T.eq(T.mod(new[it],n_accum), n_accum-1)

    for i in range(len(w)):
        mom1 = G.sharedf(w[i].get_value() * 0.)
        _max = G.sharedf(w[i].get_value() * 0.)
        g_sum = G.sharedf(w[i].get_value() * 0.)
        #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's
        new[g_sum] = ifelse(reset, g[i], g_sum + g[i])
        new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1)
        new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max)
        new[w[i]] = ifelse(update, w[i] + alpha *  new[mom1] / new[_max], w[i])
    return new

# AdaMax that keeps running average of parameter 
Example #27
Source File:    From CTC-Connectionist-Temporal-Classification with Apache License 2.0 5 votes vote down vote up
def log_add(a, b):
        max_ = tensor.maximum(a, b)
        return (max_ + tensor.log1p(tensor.exp(a + b - 2 * max_))) 
Example #28
Source File:    From OpenDeep with Apache License 2.0 5 votes vote down vote up
def rectifier(x, leaky=0):
    Returns the element-wise rectifier (ReLU) applied to x.

    x : tensor
        Symbolic Tensor (or compatible).
    leaky: scalar or tensor
        Slope for negative input, usually between 0 and 1. The default value of 0 will lead to the standard rectifier,
        1 will lead to a linear activation function, and any value in between will give a leaky rectifier.
        A shared variable (broadcastable against x) will result in a parameterized rectifier with learnable slope(s).

        Element-wise rectifier: rectifier(x) = max(0,x) applied to `x`.

    # return T.maximum(as_floatX(0), x)
    # below fix is taken from Lasagne framework:
    # The following is faster than lambda x: T.maximum(0, x)
    # Thanks to @SnippyHolloW for pointing this out.
    # See:
    # return (x + abs(x)) / as_floatX(2.0)
    return T.nnet.relu(x, alpha=leaky) 
Example #29
Source File:    From weightnorm with MIT License 5 votes vote down vote up
def lrelu(x, a=0.1):
    return T.maximum(x, a*x) 
Example #30
Source File:    From attention-lvcsr with MIT License 5 votes vote down vote up
def structured_maximum(x, y):
    Structured elemwise maximum of sparse matrix x by scalar y.

    # see decorator for function body