Python theano.tensor.max() Examples

The following are 30 code examples of theano.tensor.max(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module theano.tensor , or try the search function .
Example #1
Source File: objectives.py    From LasagneNLP with Apache License 2.0 6 votes vote down vote up
def theano_logsumexp(x, axis=None):
    """
    Compute log(sum(exp(x), axis=axis) in a numerically stable
    fashion.
    Parameters
    ----------
    x : tensor_like
        A Theano tensor (any dimension will do).
    axis : int or symbolic integer scalar, or None
        Axis over which to perform the summation. `None`, the
        default, performs over all axes.
    Returns
    -------
    result : ndarray or scalar
        The result of the log(sum(exp(...))) operation.
    """

    xmax = x.max(axis=axis, keepdims=True)
    xmax_ = x.max(axis=axis)
    return xmax_ + T.log(T.exp(x - xmax).sum(axis=axis)) 
Example #2
Source File: test_opt_uncanonicalize.py    From D-VAE with MIT License 6 votes vote down vote up
def test_optimization(self):
        # If we use only the max output, we should replace this op with
        # a faster one.
        mode = theano.compile.mode.get_default_mode().including(
            'canonicalize', 'fast_run')

        for axis in [0, 1, -1]:
            data = numpy.asarray(numpy.random.rand(2, 3), dtype=config.floatX)
            n = tensor.matrix()

            f = function([n], tensor.max_and_argmax(n, axis)[0], mode=mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 1
            assert isinstance(topo[0].op, CAReduce)

            f = function([n], tensor.max_and_argmax(n, axis), mode=mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 1
            assert isinstance(topo[0].op, tensor.MaxAndArgmax) 
Example #3
Source File: utils.py    From LSTM-CRF-models with MIT License 6 votes vote down vote up
def theano_logsumexp(x, axis=None):
    """
    Compute log(sum(exp(x), axis=axis) in a numerically stable
    fashion.
    Parameters
    ----------
    x : tensor_like
        A Theano tensor (any dimension will do).
    axis : int or symbolic integer scalar, or None
        Axis over which to perform the summation. `None`, the
        default, performs over all axes.
    Returns
    -------
    result : ndarray or scalar
        The result of the log(sum(exp(...))) operation.
    """
    xmax = T.max(x,axis=axis, keepdims=True)
    xmax_ = T.max(x,axis=axis)
    return xmax_ + T.log(T.exp(x - xmax).sum(axis=axis)) 
Example #4
Source File: theano_backend.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next 
Example #5
Source File: test_opt_uncanonicalize.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_optimization(self):
        # If we use only the max output, we should replace this op with
        # a faster one.
        mode = theano.compile.mode.get_default_mode().including(
            'canonicalize', 'fast_run')

        for axis in [0, 1, -1]:
            data = numpy.asarray(numpy.random.rand(2, 3), dtype=config.floatX)
            n = tensor.matrix()

            f = function([n], tensor.max_and_argmax(n, axis)[0], mode=mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 1
            assert isinstance(topo[0].op, CAReduce)

            f = function([n], tensor.max_and_argmax(n, axis), mode=mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 1
            assert isinstance(topo[0].op, tensor.MaxAndArgmax) 
Example #6
Source File: util.py    From gated-graph-transformer-network with MIT License 6 votes vote down vote up
def reduce_log_sum(tensor, axis=None, guaranteed_finite=False):
    """
    Sum probabilities in the log domain, i.e return
        log(e^vec[0] + e^vec[1] + ...)
        = log(e^x e^(vec[0]-x) + e^x e^(vec[1]-x) + ...)
        = log(e^x [e^(vec[0]-x) + e^(vec[1]-x) + ...])
        = log(e^x) + log(e^(vec[0]-x) + e^(vec[1]-x) + ...)
        = x + log(e^(vec[0]-x) + e^(vec[1]-x) + ...)
    For numerical stability, we choose x = max(vec)
    Note that if x is -inf, that means all values are -inf,
    so the answer should be -inf. In this case, choose x = 0
    """
    maxval = T.max(tensor, axis)
    maxval_full = T.max(tensor, axis, keepdims=True)
    if not guaranteed_finite:
        maxval = T.switch(T.isfinite(maxval), maxval, T.zeros_like(maxval))
        maxval_full = T.switch(T.isfinite(maxval_full), maxval_full, T.zeros_like(maxval_full))
    reduced_sum = T.sum(T.exp(tensor - maxval_full), axis)
    logsum = maxval + T.log(reduced_sum)
    return logsum 
Example #7
Source File: theano_backend.py    From Att-ChemdNER with Apache License 2.0 6 votes vote down vote up
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next 
Example #8
Source File: test_dnn.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_pool_grad(self):
        if not dnn.dnn_available():
            raise SkipTest(dnn.dnn_available.msg)
        img = T.ftensor4('img')
        img_grad = T.ftensor4('img_grad')
        out = T.ftensor4('out')
        img_val = numpy.asarray(
            numpy.random.rand(2, 3, 4, 5),
            dtype='float32'
        )
        img_grad_val = numpy.asarray(
            numpy.random.rand(2, 3, 4, 5),
            dtype='float32'
        )
        out_val = numpy.asarray(
            numpy.random.rand(2, 3, 4, 5),
            dtype='float32'
        )

        for params in product(
            [(1, 1), (2, 2), (3, 3)],
            [(1, 1), (2, 2), (3, 3)],
            ['max', 'average_inc_pad']
        ):
            pool_grad = dnn.GpuDnnPoolGrad()(
                img,
                out,
                img_grad,
                params[0],
                params[1],
                (0, 0)
            )
            self._compile_and_check(
                [img, img_grad, out],
                [pool_grad],
                [img_val, img_grad_val, out_val],
                dnn.GpuDnnPoolGrad
            )


# this has been a problem in the past 
Example #9
Source File: nanguardmode.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def compile_gpu_func(nan_is_error, inf_is_error, big_is_error):
    """ compile utility function used by contains_nan and contains_inf
    """
    global f_gpumin, f_gpumax, f_gpuabsmax
    if not cuda.cuda_available:
        return
    guard_input = cuda.fvector('nan_guard')
    cuda_compile_failed = False
    if (nan_is_error or inf_is_error) and f_gpumin is None:
        try:
            f_gpumin = theano.function(
                [guard_input], T.min(guard_input),
                mode='FAST_RUN'
            )
        except RuntimeError:
            # This can happen if cuda is available, but the
            # device is in exclusive mode and used by another
            # process.
            cuda_compile_failed = True
    if inf_is_error and not cuda_compile_failed and f_gpumax is None:
        try:
            f_gpumax = theano.function(
                [guard_input], T.max(guard_input),
                mode='FAST_RUN'
            )
        except RuntimeError:
            # This can happen if cuda is available, but the
            # device is in exclusive mode and used by another
            # process.
            cuda_compile_failed = True
    if big_is_error and not cuda_compile_failed and f_gpuabsmax is None:
        try:
            f_gpuabsmax = theano.function(
                [guard_input], T.max(T.abs_(guard_input)),
                mode='FAST_RUN'
                )
        except RuntimeError:
            # This can happen if cuda is available, but the
            # device is in exclusive mode and used by another
            # process.
            cuda_compile_failed = True 
Example #10
Source File: test_opt_uncanonicalize.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_optimization_min(self):
        data = numpy.asarray(numpy.random.rand(2, 3), dtype=config.floatX)
        n = tensor.matrix()

        for axis in [0, 1, -1]:
            f = function([n], tensor.min(n, axis), mode=self.mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 1
            assert isinstance(topo[0].op, CAReduce)
            f(data)

            # test variant with neg to make sure we optimize correctly
            f = function([n], tensor.min(-n, axis), mode=self.mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 2
            assert isinstance(topo[0].op, CAReduce)  # max
            assert isinstance(topo[1].op, Elemwise)
            assert isinstance(topo[1].op.scalar_op, scalar.Neg)
            f(data)

            f = function([n], -tensor.min(n, axis), mode=self.mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 2
            assert isinstance(topo[0].op, Elemwise)
            assert isinstance(topo[0].op.scalar_op, scalar.Neg)
            assert isinstance(topo[1].op, CAReduce)  # max
            f(data)

            f = function([n], -tensor.min(-n, axis), mode=self.mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 1
            assert isinstance(topo[0].op, CAReduce)  # max
            f(data) 
Example #11
Source File: test_opt_uncanonicalize.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_optimization_max(self):
        data = numpy.asarray(numpy.random.rand(2, 3), dtype=config.floatX)
        n = tensor.matrix()

        for axis in [0, 1, -1]:
            f = function([n], tensor.max(n, axis), mode=self.mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 1
            assert isinstance(topo[0].op, CAReduce)
            f(data)

            f = function([n], tensor.max(-n, axis), mode=self.mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 2
            assert isinstance(topo[0].op, Elemwise)
            assert isinstance(topo[0].op.scalar_op, scalar.Neg)
            assert isinstance(topo[1].op, CAReduce)
            f(data)

            f = function([n], -tensor.max(n, axis), mode=self.mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 2
            assert isinstance(topo[0].op, CAReduce)
            assert isinstance(topo[1].op, Elemwise)
            assert isinstance(topo[1].op.scalar_op, scalar.Neg)
            f(data)

            f = function([n], -tensor.max(-n, axis), mode=self.mode)
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 1
            assert isinstance(topo[0].op, CAReduce)  # min
            f(data) 
Example #12
Source File: encdec.py    From LV_groundhog with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fprop(self, x):
        if_longer = x[:self.required]
        padding = ReplicateLayer(TT.max([1, self.required - x.shape[0]]))(x[-1]).out
        if_shorter = TT.concatenate([x, padding])
        diff = x.shape[0] - self.required
        self.out = ifelse(diff < 0, if_shorter, if_longer)
        return self.out 
Example #13
Source File: test_dnn.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def pool_2d_i2n(input, ds=(2, 2), strides=None,
                pad=(0, 0),
                pool_function=T.max, mode='ignore_borders'):
    if strides is None:
        strides = ds

    if strides[0] > ds[0] or strides[1] > ds[1]:
        raise RuntimeError(
            "strides should be smaller than or equal to ds,"
            " strides=(%d, %d) and ds=(%d, %d)" %
            (strides + ds))
    shape = input.shape
    if pad != (0, 0):
        assert pool_function is T.max
        pad_x = pad[0]
        pad_y = pad[1]
        a = T.alloc(-numpy.inf, shape[0], shape[1], shape[2] + pad_x * 2,
                    shape[3] + pad_y * 2)
        input = T.set_subtensor(a[:, :,
                                  pad_x:pad_x + shape[2],
                                  pad_y:pad_y + shape[3]],
                                input)
        shape = input.shape

    neibs = images2neibs(input, ds, strides, mode=mode)
    pooled_neibs = pool_function(neibs, axis=1)

    output_width = (shape[2] - ds[0]) // strides[0] + 1
    output_height = (shape[3] - ds[1]) // strides[1] + 1

    pooled_output = pooled_neibs.reshape((shape[0], shape[1],
                                          output_width, output_height))
    return pooled_output 
Example #14
Source File: encdec.py    From LV_groundhog with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __call__(self, x):
        shape = x.shape
        if x.ndim == 1:
            shape1 = TT.cast(shape[0] / self.maxout_part, 'int64')
            shape2 = TT.cast(self.maxout_part, 'int64')
            x = x.reshape([shape1, shape2])
            x = x.max(1)
        else:
            shape1 = TT.cast(shape[1] / self.maxout_part, 'int64')
            shape2 = TT.cast(self.maxout_part, 'int64')
            x = x.reshape([shape[0], shape1, shape2])
            x = x.max(2)
        return x 
Example #15
Source File: test_dnn.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def pool_2d_i2n(input, ds=(2, 2), strides=None, pad=(0, 0),
                pool_function=T.max, mode='ignore_borders'):
    if strides is None:
        strides = ds

    if strides[0] > ds[0] or strides[1] > ds[1]:
        raise RuntimeError(
            "strides should be smaller than or equal to ds,"
            " strides=(%d, %d) and ds=(%d, %d)" %
            (strides + ds))
    shape = input.shape
    if pad != (0, 0):
        assert pool_function is T.max
        pad_x = pad[0]
        pad_y = pad[1]
        a = T.alloc(-numpy.inf, shape[0], shape[1], shape[2] + pad_x * 2,
                    shape[3] + pad_y * 2)
        input = T.set_subtensor(a[:, :,
                                  pad_x:pad_x + shape[2],
                                  pad_y:pad_y + shape[3]],
                                input)
        shape = input.shape

    neibs = images2neibs(input, ds, strides, mode=mode)
    pooled_neibs = pool_function(neibs, axis=1)

    output_width = (shape[2] - ds[0]) // strides[0] + 1
    output_height = (shape[3] - ds[1]) // strides[1] + 1

    pooled_output = pooled_neibs.reshape((shape[0], shape[1],
                                          output_width, output_height))
    return pooled_output 
Example #16
Source File: theano_backend.py    From Att-ChemdNER with Apache License 2.0 5 votes vote down vote up
def max(x, axis=None, keepdims=False):
    return T.max(x, axis=axis, keepdims=keepdims) 
Example #17
Source File: pyipm.py    From pyipm with MIT License 5 votes vote down vote up
def reghess(self, Hc):
        '''Regularize the Hessian to avoid ill-conditioning and to escape saddle
           points.
        '''
        # compute eigenvalues and condition number
        w = self.eigh(Hc)
        rcond = np.min(np.abs(w)) / np.max(np.abs(w))

        if rcond <= self.eps or (self.neq + self.nineq) != np.sum(w < -self.eps):
            # if the Hessian is ill-conditioned or the matrix inertia is undesireable, regularize the Hessian
            if rcond <= self.eps and self.neq:
                # if the Hessian is ill-conditioned, regularize by replacing some zeros with a small magnitude diagonal
                # matrix
                ind1 = self.nvar + self.nineq
                ind2 = ind1 + self.neq
                Hc[ind1:ind2, ind1:ind2] -= self.reg_coef * self.eta * (self.mu_host ** self.beta) * np.eye(self.neq)
            if self.delta == 0.0:
                # if the diagonal shift coefficient is zero, set to initial value
                self.delta = self.delta0
            else:
                # prevent the diagonal shift coefficient from becoming too small
                self.delta = np.max([self.delta / 2, self.delta0])
            # regularize Hessian with diagonal shift matrix (delta*I) until matrix inertia condition is satisfied
            Hc[:self.nvar, :self.nvar] += self.delta * np.eye(self.nvar)
            w = self.eigh(Hc)
            while (self.neq + self.nineq) != np.sum(w < -self.eps):
                Hc[:self.nvar, :self.nvar] -= self.delta * np.eye(self.nvar)
                self.delta *= 10.0
                Hc[:self.nvar, :self.nvar] += self.delta * np.eye(self.nvar)
                w = self.eigh(Hc)

        # return regularized Hessian
        return Hc 
Example #18
Source File: utils.py    From hred-qs with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def SoftMax(x):
    x = T.exp(x - T.max(x, axis=x.ndim-1, keepdims=True))
    return x / T.sum(x, axis=x.ndim-1, keepdims=True) 
Example #19
Source File: utils.py    From hred-qs with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __call__(self, x):
        shape = x.shape
        if x.ndim == 2:
            shape1 = T.cast(shape[1] / self.maxout_part, 'int64')
            shape2 = T.cast(self.maxout_part, 'int64')
            x = x.reshape([shape[0], shape1, shape2])
            x = x.max(2)
        else:
            shape1 = T.cast(shape[2] / self.maxout_part, 'int64')
            shape2 = T.cast(self.maxout_part, 'int64')
            x = x.reshape([shape[0], shape[1], shape1, shape2])
            x = x.max(3)
        return x 
Example #20
Source File: activations.py    From dcgan_code with MIT License 5 votes vote down vote up
def __call__(self, x):
        if x.ndim == 2:
            x = T.max([x[:, n::self.n_pool] for n in range(self.n_pool)], axis=0)
        elif x.ndim == 4:
            x = T.max([x[:, n::self.n_pool, :, :] for n in range(self.n_pool)], axis=0)
        else:
            raise NotImplementedError
        return x 
Example #21
Source File: activations.py    From dcgan_code with MIT License 5 votes vote down vote up
def __call__(self, x):
        e_x = T.exp(x - x.max(axis=1, keepdims=True))
        return e_x / e_x.sum(axis=1, keepdims=True) 
Example #22
Source File: activations.py    From dcgan_code with MIT License 5 votes vote down vote up
def __call__(self, x):
        e_x = T.exp(x - x.max(axis=1).dimshuffle(0, 'x'))
        return e_x / e_x.sum(axis=1).dimshuffle(0, 'x') 
Example #23
Source File: regularization.py    From OpenDeep with Apache License 2.0 5 votes vote down vote up
def elastic(parameters, l1_coefficient, l2_coefficient=None):
    """
    Elastic net regularization is a weighted combination of l1 and l2 regularization. This is known as
    elastic regularization.

    Parameters
    ----------
    parameters : list of theano variables
        Parameters to apply the regularization.
    l1_coefficient : float
        Weighting for L1 regularization contribution.
    l2_coefficient : float
        Weighting for L2 regularization contribution.

    Returns
    -------
    theano expression
        The appropriate regularized parameters as a weighted combination of L1 and L2.
    """
    # if the second coefficient isn't provided, just make it (1-l1_coef). Bound it at 0 though.
    if l2_coefficient is None:
        l2_coefficient = T.max(1-l1_coefficient, 0)

    if parameters is not None:
        return l1_coefficient*L1(parameters) + l2_coefficient*L2(parameters)
    else:
        log.warning("None parameters passed to elastic regularizer!") 
Example #24
Source File: statistics.py    From OpenDeep with Apache License 2.0 5 votes vote down vote up
def get_stats(input, stat=None):
    """
    Returns a dictionary mapping the name of the statistic to the result on the input.
    Currently gets mean, var, std, min, max, l1, l2.

    Parameters
    ----------
    input : tensor
        Theano tensor to grab stats for.

    Returns
    -------
    dict
        Dictionary of all the statistics expressions {string_name: theano expression}
    """
    stats = {
        'mean': T.mean(input),
        'var': T.var(input),
        'std': T.std(input),
        'min': T.min(input),
        'max': T.max(input),
        'l1': input.norm(L=1),
        'l2': input.norm(L=2),
        #'num_nonzero': T.sum(T.nonzero(input)),
    }
    stat_list = raise_to_list(stat)
    compiled_stats = {}
    if stat_list is None:
        return stats

    for stat in stat_list:
        if isinstance(stat, string_types) and stat in stats:
            compiled_stats.update({stat: stats[stat]})
    return compiled_stats 
Example #25
Source File: nn.py    From weightnorm with MIT License 5 votes vote down vote up
def softmax_loss(p_true, output_before_softmax):
    output_before_softmax -= T.max(output_before_softmax, axis=1, keepdims=True)
    if p_true.ndim==2:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - T.sum(p_true*output_before_softmax, axis=1))
    else:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - output_before_softmax[T.arange(p_true.shape[0]),p_true]) 
Example #26
Source File: nn.py    From weightnorm with MIT License 5 votes vote down vote up
def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis)) 
Example #27
Source File: utils.py    From hred-latent-piecewise with GNU General Public License v3.0 5 votes vote down vote up
def SoftMax(x):
    x = T.exp(x - T.max(x, axis=x.ndim-1, keepdims=True))
    return x / T.sum(x, axis=x.ndim-1, keepdims=True) 
Example #28
Source File: utils.py    From hred-latent-piecewise with GNU General Public License v3.0 5 votes vote down vote up
def __call__(self, x):
        shape = x.shape
        if x.ndim == 2:
            shape1 = T.cast(shape[1] / self.maxout_part, 'int64')
            shape2 = T.cast(self.maxout_part, 'int64')
            x = x.reshape([shape[0], shape1, shape2])
            x = x.max(2)
        else:
            shape1 = T.cast(shape[2] / self.maxout_part, 'int64')
            shape2 = T.cast(self.maxout_part, 'int64')
            x = x.reshape([shape[0], shape[1], shape1, shape2])
            x = x.max(3)
        return x 
Example #29
Source File: nn.py    From deligan with MIT License 5 votes vote down vote up
def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis)) 
Example #30
Source File: nn.py    From deligan with MIT License 5 votes vote down vote up
def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))