Python numpy.apply_over_axes() Examples

The following are 20 code examples for showing how to use numpy.apply_over_axes(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module numpy , or try the search function .

Example 1
Project: pingouin   Author: raphaelvallat   File: test_nonparametric.py    License: GNU General Public License v3.0 6 votes vote down vote up
def test_harrelldavis(self):
        """Test Harrel-Davis estimation of :math:`q^{th}` quantile.
        """
        a = [77, 87, 88, 114, 151, 210, 219, 246, 253, 262, 296, 299,
             306, 376, 428, 515, 666, 1310, 2611]
        assert harrelldavis(a, quantile=0.5) == 271.72120054908913
        harrelldavis(x=x, quantile=np.arange(0.1, 1, 0.1))
        assert harrelldavis(a, [0.25, 0.5, 0.75])[1] == 271.72120054908913
        # Test multiple axis
        p = np.random.normal(0, 1, (10, 100))

        def func(a, axes):
            return harrelldavis(a, [0.25, 0.75], axes)

        np.testing.assert_array_almost_equal(harrelldavis(p, [0.25, 0.75], 0),
                                             np.apply_over_axes(func, p, 0))

        np.testing.assert_array_almost_equal(harrelldavis(p, [0.25, 0.75], -1),
                                             np.apply_over_axes(func, p, 1)) 
Example 2
Project: hypothetical   Author: aschleg   File: contingency.py    License: MIT License 5 votes vote down vote up
def table_margins(table):
    r"""
    Computes the marginal sums of a given array.

    Parameters
    ----------
    table : array-like
        A one or two-dimensional array-like object.

    Raises
    ------
    ValueError
        The given array must be either a one or two-dimensional array.

    Returns
    -------
    r, c : tuple
        A tuple containing the total sums of the table rows and the total sums of the table columns.

    Examples
    --------
    >>> t = table_margins([[10, 10, 20], [20, 20, 10]])

    """
    if not isinstance(table, np.ndarray):
        table = np.array(table).copy()

    if table.ndim > 2:
        raise ValueError('table must be a one or two-dimensional array.')

    table_dim = table.ndim

    c = np.apply_over_axes(np.sum, table, 0)

    if table_dim == 2:
        r = np.apply_over_axes(np.sum, table, 1)
    else:
        r = table

    return r, c 
Example 3
Project: vnpy_crypto   Author: birforce   File: scale.py    License: MIT License 5 votes vote down vote up
def mad(a, c=Gaussian.ppf(3/4.), axis=0, center=np.median):
    # c \approx .6745
    """
    The Median Absolute Deviation along given axis of an array

    Parameters
    ----------
    a : array-like
        Input array.
    c : float, optional
        The normalization constant.  Defined as scipy.stats.norm.ppf(3/4.),
        which is approximately .6745.
    axis : int, optional
        The defaul is 0. Can also be None.
    center : callable or float
        If a callable is provided, such as the default `np.median` then it
        is expected to be called center(a). The axis argument will be applied
        via np.apply_over_axes. Otherwise, provide a float.

    Returns
    -------
    mad : float
        `mad` = median(abs(`a` - center))/`c`
    """
    a = np.asarray(a)
    if callable(center):
        center = np.apply_over_axes(center, a, axis)
    return np.median((np.fabs(a-center))/c, axis=axis) 
Example 4
Project: TrajLib   Author: metemaad   File: TrajectoryFeatures.py    License: Apache License 2.0 5 votes vote down vote up
def mad(a):
    c = 0.67448975019608171
    axis = 0
    center = np.median
    center = np.apply_over_axes(center, a, axis)
    return np.median((np.fabs(a - center)) / c, axis=axis) 
Example 5
Project: TextDetector   Author: zchengquan   File: __init__.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _format_as_impl(self, is_numeric, batch, space):
        assert isinstance(space, SequenceSpace)
        if is_numeric:
            rval = np.apply_over_axes(
                lambda batch, axis: self.space._format_as_impl(
                    is_numeric=is_numeric,
                    batch=batch,
                    space=space.space),
                batch, 0)
        else:
            NotImplementedError("Can't convert SequenceSpace Theano variables")
        return rval 
Example 6
Project: Splunking-Crime   Author: nccgroup   File: scale.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def mad(a, c=Gaussian.ppf(3/4.), axis=0, center=np.median):
    # c \approx .6745
    """
    The Median Absolute Deviation along given axis of an array

    Parameters
    ----------
    a : array-like
        Input array.
    c : float, optional
        The normalization constant.  Defined as scipy.stats.norm.ppf(3/4.),
        which is approximately .6745.
    axis : int, optional
        The defaul is 0. Can also be None.
    center : callable or float
        If a callable is provided, such as the default `np.median` then it
        is expected to be called center(a). The axis argument will be applied
        via np.apply_over_axes. Otherwise, provide a float.

    Returns
    -------
    mad : float
        `mad` = median(abs(`a` - center))/`c`
    """
    a = np.asarray(a)
    if callable(center):
        center = np.apply_over_axes(center, a, axis)
    return np.median((np.fabs(a-center))/c, axis=axis) 
Example 7
Project: NiMARE   Author: neurostuff   File: stats.py    License: MIT License 5 votes vote down vote up
def two_way(cells):
    """Two-way chi-square test of independence.

    Takes a 3D array as input: N(voxels) x 2 x 2, where the last two
    dimensions are the contingency table for each of N voxels.

    Parameters
    ----------
    cells : (N, 2, 2) array_like
        Concatenated set of contingency tables. There are N contingency tables,
        with the last two dimensions being the tables for each input.

    Returns
    -------
    chi_sq : :class:`numpy.ndarray`
        Chi-square values.

    Notes
    -----
    Taken from Neurosynth.
    """
    # Mute divide-by-zero warning for bad voxels since we account for that
    # later
    warnings.simplefilter("ignore", RuntimeWarning)

    cells = cells.astype('float64')  # Make sure we don't overflow
    total = np.apply_over_axes(np.sum, cells, [1, 2]).ravel()
    chi_sq = np.zeros(cells.shape, dtype='float64')
    for i in range(2):
        for j in range(2):
            exp = np.sum(cells[:, i, :], 1).ravel() * \
                np.sum(cells[:, :, j], 1).ravel() / total
            bad_vox = np.where(exp == 0)[0]
            chi_sq[:, i, j] = (cells[:, i, j] - exp) ** 2 / exp
            chi_sq[bad_vox, i, j] = 1.0  # Set p-value for invalid voxels to 1
    chi_sq = np.apply_over_axes(np.sum, chi_sq, [1, 2]).ravel()
    return chi_sq 
Example 8
Project: Carnets   Author: holzschu   File: test_quantity_non_ufuncs.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_apply_over_axes(self, axes):
        def function(x, axis):
            return np.sum(np.square(x), axis)

        out = np.apply_over_axes(function, self.q, axes)
        expected = np.apply_over_axes(function, self.q.value, axes)
        expected = expected * self.q.unit ** (2 * len(axes))
        assert_array_equal(out, expected) 
Example 9
Project: lambda-packs   Author: ryfeus   File: contingency.py    License: MIT License 4 votes vote down vote up
def margins(a):
    """Return a list of the marginal sums of the array `a`.

    Parameters
    ----------
    a : ndarray
        The array for which to compute the marginal sums.

    Returns
    -------
    margsums : list of ndarrays
        A list of length `a.ndim`.  `margsums[k]` is the result
        of summing `a` over all axes except `k`; it has the same
        number of dimensions as `a`, but the length of each axis
        except axis `k` will be 1.

    Examples
    --------
    >>> a = np.arange(12).reshape(2, 6)
    >>> a
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11]])
    >>> m0, m1 = margins(a)
    >>> m0
    array([[15],
           [51]])
    >>> m1
    array([[ 6,  8, 10, 12, 14, 16]])

    >>> b = np.arange(24).reshape(2,3,4)
    >>> m0, m1, m2 = margins(b)
    >>> m0
    array([[[ 66]],
           [[210]]])
    >>> m1
    array([[[ 60],
            [ 92],
            [124]]])
    >>> m2
    array([[[60, 66, 72, 78]]])
    """
    margsums = []
    ranged = list(range(a.ndim))
    for k in ranged:
        marg = np.apply_over_axes(np.sum, a, [j for j in ranged if j != k])
        margsums.append(marg)
    return margsums 
Example 10
Project: lambda-packs   Author: ryfeus   File: contingency.py    License: MIT License 4 votes vote down vote up
def expected_freq(observed):
    """
    Compute the expected frequencies from a contingency table.

    Given an n-dimensional contingency table of observed frequencies,
    compute the expected frequencies for the table based on the marginal
    sums under the assumption that the groups associated with each
    dimension are independent.

    Parameters
    ----------
    observed : array_like
        The table of observed frequencies.  (While this function can handle
        a 1-D array, that case is trivial.  Generally `observed` is at
        least 2-D.)

    Returns
    -------
    expected : ndarray of float64
        The expected frequencies, based on the marginal sums of the table.
        Same shape as `observed`.

    Examples
    --------
    >>> observed = np.array([[10, 10, 20],[20, 20, 20]])
    >>> from scipy.stats import expected_freq
    >>> expected_freq(observed)
    array([[ 12.,  12.,  16.],
           [ 18.,  18.,  24.]])

    """
    # Typically `observed` is an integer array. If `observed` has a large
    # number of dimensions or holds large values, some of the following
    # computations may overflow, so we first switch to floating point.
    observed = np.asarray(observed, dtype=np.float64)

    # Create a list of the marginal sums.
    margsums = margins(observed)

    # Create the array of expected frequencies.  The shapes of the
    # marginal sums returned by apply_over_axes() are just what we
    # need for broadcasting in the following product.
    d = observed.ndim
    expected = reduce(np.multiply, margsums) / observed.sum() ** (d - 1)
    return expected 
Example 11
Project: Computable   Author: ktraunmueller   File: contingency.py    License: MIT License 4 votes vote down vote up
def margins(a):
    """Return a list of the marginal sums of the array `a`.

    Parameters
    ----------
    a : ndarray
        The array for which to compute the marginal sums.

    Returns
    -------
    margsums : list of ndarrays
        A list of length `a.ndim`.  `margsums[k]` is the result
        of summing `a` over all axes except `k`; it has the same
        number of dimensions as `a`, but the length of each axis
        except axis `k` will be 1.

    Examples
    --------
    >>> a = np.arange(12).reshape(2, 6)
    >>> a
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11]])
    >>> m0, m1 = margins(a)
    >>> m0
    array([[15],
           [51]])
    >>> m1
    array([[ 6,  8, 10, 12, 14, 16]])

    >>> b = np.arange(24).reshape(2,3,4)
    >>> m0, m1, m2 = margins(b)
    >>> m0
    array([[[ 66]],
           [[210]]])
    >>> m1
    array([[[ 60],
            [ 92],
            [124]]])
    >>> m2
    array([[[60, 66, 72, 78]]])
    """
    margsums = []
    ranged = list(range(a.ndim))
    for k in ranged:
        marg = np.apply_over_axes(np.sum, a, [j for j in ranged if j != k])
        margsums.append(marg)
    return margsums 
Example 12
Project: Computable   Author: ktraunmueller   File: contingency.py    License: MIT License 4 votes vote down vote up
def expected_freq(observed):
    """
    Compute the expected frequencies from a contingency table.

    Given an n-dimensional contingency table of observed frequencies,
    compute the expected frequencies for the table based on the marginal
    sums under the assumption that the groups associated with each
    dimension are independent.

    Parameters
    ----------
    observed : array_like
        The table of observed frequencies.  (While this function can handle
        a 1-D array, that case is trivial.  Generally `observed` is at
        least 2-D.)

    Returns
    -------
    expected : ndarray of float64
        The expected frequencies, based on the marginal sums of the table.
        Same shape as `observed`.

    Examples
    --------
    >>> observed = np.array([[10, 10, 20],[20, 20, 20]])
    >>> expected_freq(observed)
    array([[ 12.,  12.,  16.],
           [ 18.,  18.,  24.]])

    """
    # Typically `observed` is an integer array. If `observed` has a large
    # number of dimensions or holds large values, some of the following
    # computations may overflow, so we first switch to floating point.
    observed = np.asarray(observed, dtype=np.float64)

    # Create a list of the marginal sums.
    margsums = margins(observed)

    # Create the array of expected frequencies.  The shapes of the
    # marginal sums returned by apply_over_axes() are just what we
    # need for broadcasting in the following product.
    d = observed.ndim
    expected = reduce(np.multiply, margsums) / observed.sum() ** (d - 1)
    return expected 
Example 13
Project: GraphicDesignPatternByPython   Author: Relph1119   File: contingency.py    License: MIT License 4 votes vote down vote up
def margins(a):
    """Return a list of the marginal sums of the array `a`.

    Parameters
    ----------
    a : ndarray
        The array for which to compute the marginal sums.

    Returns
    -------
    margsums : list of ndarrays
        A list of length `a.ndim`.  `margsums[k]` is the result
        of summing `a` over all axes except `k`; it has the same
        number of dimensions as `a`, but the length of each axis
        except axis `k` will be 1.

    Examples
    --------
    >>> a = np.arange(12).reshape(2, 6)
    >>> a
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11]])
    >>> m0, m1 = margins(a)
    >>> m0
    array([[15],
           [51]])
    >>> m1
    array([[ 6,  8, 10, 12, 14, 16]])

    >>> b = np.arange(24).reshape(2,3,4)
    >>> m0, m1, m2 = margins(b)
    >>> m0
    array([[[ 66]],
           [[210]]])
    >>> m1
    array([[[ 60],
            [ 92],
            [124]]])
    >>> m2
    array([[[60, 66, 72, 78]]])
    """
    margsums = []
    ranged = list(range(a.ndim))
    for k in ranged:
        marg = np.apply_over_axes(np.sum, a, [j for j in ranged if j != k])
        margsums.append(marg)
    return margsums 
Example 14
Project: GraphicDesignPatternByPython   Author: Relph1119   File: contingency.py    License: MIT License 4 votes vote down vote up
def expected_freq(observed):
    """
    Compute the expected frequencies from a contingency table.

    Given an n-dimensional contingency table of observed frequencies,
    compute the expected frequencies for the table based on the marginal
    sums under the assumption that the groups associated with each
    dimension are independent.

    Parameters
    ----------
    observed : array_like
        The table of observed frequencies.  (While this function can handle
        a 1-D array, that case is trivial.  Generally `observed` is at
        least 2-D.)

    Returns
    -------
    expected : ndarray of float64
        The expected frequencies, based on the marginal sums of the table.
        Same shape as `observed`.

    Examples
    --------
    >>> observed = np.array([[10, 10, 20],[20, 20, 20]])
    >>> from scipy.stats import expected_freq
    >>> expected_freq(observed)
    array([[ 12.,  12.,  16.],
           [ 18.,  18.,  24.]])

    """
    # Typically `observed` is an integer array. If `observed` has a large
    # number of dimensions or holds large values, some of the following
    # computations may overflow, so we first switch to floating point.
    observed = np.asarray(observed, dtype=np.float64)

    # Create a list of the marginal sums.
    margsums = margins(observed)

    # Create the array of expected frequencies.  The shapes of the
    # marginal sums returned by apply_over_axes() are just what we
    # need for broadcasting in the following product.
    d = observed.ndim
    expected = reduce(np.multiply, margsums) / observed.sum() ** (d - 1)
    return expected 
Example 15
Project: bayesloop   Author: christophmark   File: core.py    License: MIT License 4 votes vote down vote up
def getHyperParameterDistribution(self, name, plot=False, **kwargs):
        """
        Computes marginal hyper-parameter distribution of a single hyper-parameter in a HyperStudy fit.

        Args:
            name(str): Name of the hyper-parameter to display
                (first model hyper-parameter)
            plot(bool): If True, a bar chart of the distribution is created
            **kwargs: All further keyword-arguments are passed to the bar-plot (see matplotlib documentation)

        Returns:
            ndarray, ndarray: The first array contains the hyper-parameter values, the second one the
                corresponding probability values
        """
        # check if only a standard fit has been carried out
        if len(self.hyperGridValues) < 2:
            raise PostProcessingError('At least two combinations of hyper-parameter values need to be fitted to '
                                      'evaluate a hyper-parameter distribution. Check transition model.')

        paramIndex = self._getHyperParameterIndex(self.transitionModel, name)

        axesToMarginalize = list(range(len(self.flatHyperParameterNames)))
        axesToMarginalize.remove(paramIndex)

        # reshape hyper-parameter distribution for easy marginalizing
        hyperGridSteps = []
        for x in self.flatHyperParameters:
            if isinstance(x, Iterable):
                hyperGridSteps.append(len(x))
            else:
                hyperGridSteps.append(1)

        distribution = self.hyperParameterDistribution.reshape(hyperGridSteps, order='C')
        marginalDistribution = np.squeeze(np.apply_over_axes(np.sum, distribution, axesToMarginalize))
        marginalDistribution *= np.prod(self.hyperGridConstant)  # convert to probability (from density)

        x = self.flatHyperParameters[paramIndex]
        if plot:
            # check if categorical
            if np.any(np.abs(np.diff(np.diff(x))) > 10 ** -10):
                plt.bar(np.arange(len(x)), marginalDistribution, align='center', width=1., **kwargs)
                plt.xticks(np.arange(len(x)), x)
                plt.ylabel('probability')
            # regular spacing
            else:
                plt.bar(x, marginalDistribution, align='center', width=self.hyperGridConstant[paramIndex], **kwargs)
                plt.ylabel('probability')

            plt.xlabel(self.flatHyperParameterNames[paramIndex])

        return x, marginalDistribution 
Example 16
Project: bayesloop   Author: christophmark   File: core.py    License: MIT License 4 votes vote down vote up
def getCurrentParameterDistribution(self, name, plot=False, density=True, **kwargs):
        """
        Compute the current marginal parameter distribution.

        Args:
            name(str): Name of the parameter to display
            plot(bool): If True, a plot of the distribution is created
            density(bool): If true, probability density is plotted; if false, probability values.
            **kwargs: All further keyword-arguments are passed to the plot (see matplotlib documentation)

        Returns:
            ndarray, ndarray: The first array contains the parameter values, the second one the corresponding
                probability values
        """
        # get parameter index
        paramIndex = -1
        for i, n in enumerate(self.observationModel.parameterNames):
            if n == name:
                paramIndex = i

        # check if match was found
        if paramIndex == -1:
            raise PostProcessingError('Wrong parameter name. Available options: {0}'
                                      .format(self.observationModel.parameterNames))

        axesToMarginalize = list(range(len(self.observationModel.parameterNames)))
        try:
            axesToMarginalize.remove(paramIndex)
        except ValueError:
            raise PostProcessingError('Wrong parameter index. Available indices: {}'.format(axesToMarginalize))

        x = self.marginalGrid[paramIndex]
        dx = self.latticeConstant[paramIndex]
        marginalDistribution = np.squeeze(
            np.apply_over_axes(np.sum, self.marginalizedPosterior, axesToMarginalize)).copy()

        if density:
            marginalDistribution /= dx

        if plot:
            plt.fill_between(x, 0, marginalDistribution, **kwargs)
            plt.xlabel(self.observationModel.parameterNames[paramIndex])
            if density:
                plt.ylabel('probability density')
            else:
                plt.ylabel('probability')

        return x, marginalDistribution 
Example 17
Project: bayesloop   Author: christophmark   File: core.py    License: MIT License 4 votes vote down vote up
def getCurrentHyperParameterDistribution(self, name, plot=False, **kwargs):
        """
        Computes marginal hyper-parameter distribution of a single hyper-parameter at the current time step in an
        OnlineStudy fit.

        Args:
            name(str): hyper-parameter name
            plot(bool): If True, a bar chart of the distribution is created
            **kwargs: All further keyword-arguments are passed to the bar-plot (see matplotlib documentation)

        Returns:
            ndarray, ndarray: The first array contains the hyper-parameter values, the second one the
                corresponding probability values
        """
        # determine indices of transition model and hyper-parameter
        hpIndex = -1
        for i, tm in enumerate(self.transitionModels):
            try:
                hpIndex = self._getHyperParameterIndex(tm, name)
                tmIndex = i
            except PostProcessingError:
                pass
        if hpIndex == -1:
            raise PostProcessingError('No hyper-parameter "{}" found. Check hyper-parameter names.'.format(name))

        hyperParameterDistribution = self.hyperParameterDistribution[tmIndex]
        axesToMarginalize = list(range(len(self.hyperParameterNames[tmIndex])))
        axesToMarginalize.remove(hpIndex)

        # reshape hyper-parameter grid for easy marginalization
        hyperGridSteps = [len(x) for x in self.allFlatHyperParameterValues[tmIndex]]
        distribution = hyperParameterDistribution.reshape(hyperGridSteps, order='C')
        marginalDistribution = np.squeeze(np.apply_over_axes(np.sum, distribution, axesToMarginalize))
        marginalDistribution *= np.prod(self.hyperGridConstants[tmIndex])

        x = self.allFlatHyperParameterValues[tmIndex][hpIndex]
        if plot:
            # check if categorical
            if np.any(np.abs(np.diff(np.diff(x))) > 10 ** -10):
                plt.bar(np.arange(len(x)), marginalDistribution, align='center', width=1., **kwargs)
                plt.xticks(np.arange(len(x)), x)
                plt.ylabel('probability')
            # regular spacing
            else:
                plt.bar(x, marginalDistribution, align='center',
                        width=self.hyperGridConstants[tmIndex][hpIndex],
                        **kwargs)
                plt.ylabel('probability')

            plt.xlabel(self.hyperParameterNames[tmIndex][hpIndex])

        return x, marginalDistribution 
Example 18
Project: TextDetector   Author: zchengquan   File: retina.py    License: GNU General Public License v3.0 4 votes vote down vote up
def downsample_rect(img,
                    start_row,
                    start_col,
                    end_row,
                    end_col,
                    width,
                    output,
                    start_idx):
    """
    .. todo::

        WRITEME

    Parameters
    ----------
    img : WRITEME
        numpy matrix in topological order
        (batch size, rows, cols, channels)
    start_row : WRITEME
        row index of top-left corner of rectangle to average pool
    start_col : WRITEME
        col index of top-left corner of rectangle to average pool
    end_row : WRITEME
        row index of bottom-right corner of rectangle to average pool
    end_col : WRITEME
        col index of bottom-right corner of rectangle to average pool
    width : WRITEME
        take the mean over rectangular block of this width
    output : WRITEME
        dense design matrix, of shape (batch size, rows*cols*channels)
    start_idx : WRITEME
        column index where to start writing the output
    """
    idx = start_idx

    for i in xrange(start_row, end_row - width + 1, width):
        for j in xrange(start_col, end_col - width + 1, width):
            block = img[:, i:i + width, j:j + width]
            output[:, idx] = numpy.apply_over_axes(
                numpy.mean, block, axes=[1, 2])[:, 0, 0]
            idx += 1

    return idx 
Example 19
Project: Splunking-Crime   Author: nccgroup   File: contingency.py    License: GNU Affero General Public License v3.0 4 votes vote down vote up
def margins(a):
    """Return a list of the marginal sums of the array `a`.

    Parameters
    ----------
    a : ndarray
        The array for which to compute the marginal sums.

    Returns
    -------
    margsums : list of ndarrays
        A list of length `a.ndim`.  `margsums[k]` is the result
        of summing `a` over all axes except `k`; it has the same
        number of dimensions as `a`, but the length of each axis
        except axis `k` will be 1.

    Examples
    --------
    >>> a = np.arange(12).reshape(2, 6)
    >>> a
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11]])
    >>> m0, m1 = margins(a)
    >>> m0
    array([[15],
           [51]])
    >>> m1
    array([[ 6,  8, 10, 12, 14, 16]])

    >>> b = np.arange(24).reshape(2,3,4)
    >>> m0, m1, m2 = margins(b)
    >>> m0
    array([[[ 66]],
           [[210]]])
    >>> m1
    array([[[ 60],
            [ 92],
            [124]]])
    >>> m2
    array([[[60, 66, 72, 78]]])
    """
    margsums = []
    ranged = list(range(a.ndim))
    for k in ranged:
        marg = np.apply_over_axes(np.sum, a, [j for j in ranged if j != k])
        margsums.append(marg)
    return margsums 
Example 20
Project: Splunking-Crime   Author: nccgroup   File: contingency.py    License: GNU Affero General Public License v3.0 4 votes vote down vote up
def expected_freq(observed):
    """
    Compute the expected frequencies from a contingency table.

    Given an n-dimensional contingency table of observed frequencies,
    compute the expected frequencies for the table based on the marginal
    sums under the assumption that the groups associated with each
    dimension are independent.

    Parameters
    ----------
    observed : array_like
        The table of observed frequencies.  (While this function can handle
        a 1-D array, that case is trivial.  Generally `observed` is at
        least 2-D.)

    Returns
    -------
    expected : ndarray of float64
        The expected frequencies, based on the marginal sums of the table.
        Same shape as `observed`.

    Examples
    --------
    >>> observed = np.array([[10, 10, 20],[20, 20, 20]])
    >>> from scipy.stats import expected_freq
    >>> expected_freq(observed)
    array([[ 12.,  12.,  16.],
           [ 18.,  18.,  24.]])

    """
    # Typically `observed` is an integer array. If `observed` has a large
    # number of dimensions or holds large values, some of the following
    # computations may overflow, so we first switch to floating point.
    observed = np.asarray(observed, dtype=np.float64)

    # Create a list of the marginal sums.
    margsums = margins(observed)

    # Create the array of expected frequencies.  The shapes of the
    # marginal sums returned by apply_over_axes() are just what we
    # need for broadcasting in the following product.
    d = observed.ndim
    expected = reduce(np.multiply, margsums) / observed.sum() ** (d - 1)
    return expected