Python scipy.stats.chi2.cdf() Examples

The following are code examples for showing how to use scipy.stats.chi2.cdf(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: statistics_introduction   Author: dhwang99   File: LRT.py    GNU General Public License v3.0 6 votes vote down vote up
def lrt_for_pea():
    pmf = np.array([9./16, 3./16, 3./16, 1./16])
    test_data = np.array([315, 101, 108, 32])
    test_sum = test_data.sum() * 1.

    r = len(pmf) - 1
    q = 0

    seta_hat = test_data/test_sum 

    #sum(Xi * log(pi_hat / pi) * 2
    lambs = 2 * np.log(seta_hat/pmf) * test_data
    lamb = lambs.sum()
    
    p_val = 1 - chi2.cdf(lamb, r-q)

    print "p value for pea: %.4f" % p_val 
Example 2
Project: statistics_introduction   Author: dhwang99   File: chi_square_test.py    GNU General Public License v3.0 6 votes vote down vote up
def chi_for_pea():
    pmf = np.array([9./16, 3./16, 3./16, 1./16])
    test_data = np.array([315, 101, 108, 32])
    test_sum = test_data.sum() * 1.

    df = len(pmf) - 1
    npi = test_sum * pmf
    X2 = ((test_data - npi) ** 2) * 1./npi
    chisq = X2.sum()
    p = 1 - chi2.cdf(chisq, df)

    print "Detail  : chisq for pea: %.4f; p value for pea: %.4f" % (chisq, p)

    #直接这么算也可以
    f_obs = test_data 
    f_exp = npi
    chisq, p = chisquare(f_obs, f_exp)
    print "Directed: chisq for pea: %.4f; p value for pea: %.4f" % (chisq, p) 
Example 3
Project: sebaPhD   Author: sebalander   File: bayesLib.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def varMahal(c1, n, c2, rank=False):
    '''
    calculate mahalanobis distance between two matrices, taking the first one
    as reference (order is important)
    if rank enabled, also returns the accumulated probability up to that
    mahalanobis distance taking into account 3 degrees of freedom
    '''
    # se elimina la fila y columna redundante porque no aportan nada
    c1Var = varVar2(c1, n)[[0,1,3]].T[[0,1,3]].T
    c1Pres = inv(c1Var) # precision matrix

    c1flat = c1[[0,0,1],[0,1,1]]
    c2flat = c2[[0,0,1],[0,1,1]]

    cFlatDif = c1flat - c2flat

    mahDist = cFlatDif.dot(c1Pres).dot(cFlatDif)

    if rank:
        ranking = chi2.cdf(mahDist, 3)
        return mahDist, ranking
    else:
        return mahDist 
Example 4
Project: quartetsampling   Author: FePhyFoFum   File: rep_data.py    GNU General Public License v3.0 6 votes vote down vote up
def chi2_test(val0, val1):
    """Calculate Pearson Chi-Squared for the special case of
       two values that are expected to be equal
       Arguments:
           val0: first value
           val1: second value
    """
    from scipy.stats import chi2
    try:
        chisq = float((val0 - val1)**2) / float(val0 + val1)
        if not chisq:
            return (0, 1)
        pval = 1.0 - chi2.cdf(chisq, 1)
        return (chisq, pval)
    except ZeroDivisionError as _:
        return (0, 1) 
Example 5
Project: osqf2015   Author: mvaz   File: model.py    MIT License 5 votes vote down vote up
def confidence(self, likelihood_stat):
        p_value = chi2.cdf(likelihood_stat, 1)
        return p_value 
Example 6
Project: jr-tools   Author: kingjr   File: base.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def dPrime(hits, misses, fas, crs):
    from scipy.stats import norm
    from math import exp, sqrt
    Z = norm.ppf
    hits, misses, fas, crs = float(hits), float(misses), float(fas), float(crs)
    # From Jonas Kristoffer Lindelov : lindeloev.net/?p=29
    # Floors an ceilings are replaced by half hits and half FA's
    halfHit = 0.5 / (hits + misses)
    halfFa = 0.5 / (fas + crs)

    # Calculate hitrate and avoid d' infinity
    hitRate = hits / (hits + misses)
    if hitRate == 1:
        hitRate = 1 - halfHit
    if hitRate == 0:
        hitRate = halfHit

    # Calculate false alarm rate and avoid d' infinity
    faRate = fas/(fas+crs)
    if faRate == 1:
        faRate = 1 - halfFa
    if faRate == 0:
        faRate = halfFa

    # Return d', beta, c and Ad'
    out = {}
    out['d'] = Z(hitRate) - Z(faRate)
    out['beta'] = exp(Z(faRate)**2 - Z(hitRate)**2)/2
    out['c'] = -(Z(hitRate) + Z(faRate))/2
    out['Ad'] = norm.cdf(out['d']/sqrt(2))
    return out 
Example 7
Project: descqa   Author: LSSTDESC   File: CalcStats.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def chisq(difference, covariance, dof):
    d = np.asarray(difference)
    cov = np.asarray(covariance)
    if cov.ndim == 1:
        cov = np.diag(cov)
    chisq_value = np.dot(d, np.dot(np.linalg.inv(cov), d))
    return chisq_value, chi2.cdf(chisq_value, dof) 
Example 8
Project: descqa   Author: LSSTDESC   File: stats.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def chisq(difference, covariance, dof):
    d = np.asarray(difference)
    cov = np.asarray(covariance)
    if cov.ndim == 1:
        cov = np.diag(cov)
    chisq_value = np.dot(d, np.dot(np.linalg.inv(cov), d))
    return chisq_value, chi2.cdf(chisq_value, dof) 
Example 9
Project: jMetalPy   Author: jMetal   File: functions.py    MIT License 5 votes vote down vote up
def sign_test(data):
    """ Given the results drawn from two algorithms/methods X and Y, the sign test analyses if
    there is a difference between X and Y.

    .. note:: Null Hypothesis: Pr(X<Y)= 0.5

    :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
    :return p_value: The associated p-value from the binomial distribution.
    :return bstat: Number of successes.
    """

    if type(data) == pd.DataFrame:
        data = data.values

    if data.shape[1] == 2:
        X, Y = data[:, 0], data[:, 1]
        n_perf = data.shape[0]
    else:
        raise ValueError(
            'Initialization ERROR. Incorrect number of dimensions for axis 1')

    # Compute the differences
    Z = X - Y
    # Compute the number of pairs Z<0
    Wminus = sum(Z < 0)
    # If H_0 is true ---> W follows Binomial(n,0.5)
    p_value_minus = 1 - binom.cdf(k=Wminus, p=0.5, n=n_perf)

    # Compute the number of pairs Z>0
    Wplus = sum(Z > 0)
    # If H_0 is true ---> W follows Binomial(n,0.5)
    p_value_plus = 1 - binom.cdf(k=Wplus, p=0.5, n=n_perf)

    p_value = 2 * min([p_value_minus, p_value_plus])

    return pd.DataFrame(data=np.array([Wminus, Wplus, p_value]), index=['Num X<Y', 'Num X>Y', 'p-value'],
                        columns=['Results']) 
Example 10
Project: jMetalPy   Author: jMetal   File: functions.py    MIT License 5 votes vote down vote up
def friedman_test(data):
    """ Friedman ranking test.

    ..note:: Null Hypothesis: In a set of k (>=2) treaments (or tested algorithms), all the treatments are equivalent, so their average ranks should be equal.

    :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
    :return p_value: The associated p-value.
    :return friedman_stat: Friedman's chi-square.
    """

    # Initial Checking
    if type(data) == pd.DataFrame:
        data = data.values

    if data.ndim == 2:
        n_samples, k = data.shape
    else:
        raise ValueError(
            'Initialization ERROR. Incorrect number of array dimensions')
    if k < 2:
        raise ValueError(
            'Initialization Error. Incorrect number of dimensions for axis 1.')

    # Compute ranks.
    datarank = ranks(data)

    # Compute for each algorithm the ranking average.
    avranks = np.mean(datarank, axis=0)

    # Get Friedman statistics
    friedman_stat = (12.0 * n_samples) / (k * (k + 1.0)) * \
                    (np.sum(avranks ** 2) - (k * (k + 1) ** 2) / 4.0)

    # Compute p-value
    p_value = (1.0 - chi2.cdf(friedman_stat, df=(k - 1)))

    return pd.DataFrame(data=np.array([friedman_stat, p_value]), index=['Friedman-statistic', 'p-value'],
                        columns=['Results']) 
Example 11
Project: statistics_introduction   Author: dhwang99   File: chi_square_test.py    GNU General Public License v3.0 5 votes vote down vote up
def chi_for_war():
    alpha = 0.05
    years = np.linspace(0, 4, 5)
    wars = np.array([223, 142, 48, 15, 4])
    wars_sum = wars.sum()

    #poission 参数估计, lambda_hat = 1/n * sum Xi
    lambda_hat = 1./wars_sum * np.dot(wars, years) 
    
    #频度计算
    fis = np.array(wars)
    fis[-2] += fis[-1]
    
    p_of_years = poisson.pmf(years, lambda_hat)
    npi = p_of_years * wars_sum
    npi[-2] += npi[-1]
    
    stats = np.power(fis - npi,  2) / npi
    chisq = stats[:-1].sum()
    
    #chi 计算
    df = 5 - 1 -1 - 1 # lambda_hat 是模拟数据,于是 df会减1; 合并最后一个 n*pi < 5, 于是又减了一个
    alpha_ppf = chi2.ppf(1-alpha, df=df)
    p = 1 - chi2.cdf(chisq, df=df)
    print "Detail  : chisq: %.4f; p: %.4f; alpha: %.4f; alpha_ppf: %.4f" % (chisq, p, alpha, alpha_ppf)

    #直接这么算也可以
    f_obs = fis[:-1]
    f_exp = npi[:-1]
    #lambda_hat是模拟的,在减去1
    chisq, p = chisquare(f_obs, f_exp, ddof=1)
    print "Directed: chisq: %.4f; p: %.4f; alpha: %.4f; alpha_ppf: %.4f" % (chisq, p, alpha, alpha_ppf) 
Example 12
Project: dave   Author: barentsen   File: covar.py    MIT License 5 votes vote down vote up
def inverseChiSquare(p, dof=2):
    """Compute inverse chi square function.

    For a given value of ``p``, computed the chi-square value
    corresponding to that probability.

    For example, the probability of getting a chi square value greater
    than 2.996 with two degrees of freedom is <0.05. So
    ``inverseChiSquare(.05, 2) == 2.996``

    Inputs:
    ------------
    p
        (float) Desired probability

    Optional Inputs
    ----------------
    dof
        (int) Degrees of freedom.


    Returns:
    -------------
    The chi-squared value corresponding to the input probability

    Notes:
    ---------
    I would expect scipy to support this, but it doesn't. The easiest
    way to compute it seems to be to interpolate over the availble
    chi-square distribution function.
    """

    y = np.logspace(2, -3, 100)
    chi = chi2(df=dof)
    x = 1 - chi.cdf(y)

    return np.interp(p, x, y) 
Example 13
Project: pathpy   Author: pathpy   File: multi_order_model.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def likelihood_ratio_test(self, observations=None, null=0, order=1,
                              threshold=0.01):

        Log.debug('start likelihood ratio test')
        a = datetime.datetime.now()

        # calculate likelihoods
        likelihood_0 = self.likelihood(observations, order=null, log=True)
        likelihood_1 = self.likelihood(observations, order=order, log=True)

        # calculate test statistics x = -2 * (log L0 - log L1)
        x = -2*(likelihood_0 - likelihood_1)

        # # calculate degrees of freedom
        dof_0 = self.degrees_of_freedom(order=null)
        dof_1 = self.degrees_of_freedom(order=order)

        # calculate the additional degrees of freedom in the alternative model
        delta_dof = dof_1 - dof_0

        # calculate p-value
        p_value = 1 - chi2.cdf(x, delta_dof)

        # reject the null hypothesis if p-value is below the threshold
        accept = p_value < threshold

        # some information
        Log.debug('Likelihood ratio test for order = {}'.format(order))
        Log.debug('test statistics x = {}'.format(x))
        Log.debug('additional degrees of freedom = {}'.format(delta_dof))
        Log.debug('p-value = {}'.format(p_value))
        Log.debug('reject the null hypothesis = {}'.format(accept))

        b = datetime.datetime.now()
        Log.debug('end likelihood ratio test:' +
                  ' {} seconds'.format((b-a).total_seconds()))

        return accept, p_value 
Example 14
Project: pyFTS   Author: PYFTS   File: ResidualAnalysis.py    GNU General Public License v3.0 5 votes vote down vote up
def ljung_box_test(residuals, lags=[1,2,3], alpha=0.5):
    from statsmodels.stats.diagnostic import acorr_ljungbox
    from scipy.stats import chi2
    
    stat, pval = acorr_ljungbox(residuals, lags=lags)
    
    rows = []

    for ct, Q in enumerate(stat):
      lag = ct+1
      p_value = 1 - chi2.cdf(Q, df=lag)
      critical_value = chi2.ppf(1 - alpha, df=lag)
      rows.append([lag, Q, p_value, critical_value, 'H0 accepted' if Q > critical_value else 'H0 rejected'])
        
    return pd.DataFrame(rows, columns=['Lag','Statistic','p-Value','Critical Value', 'Result']) 
Example 15
Project: brainpipe   Author: EtienneCmb   File: circstat.py    GNU General Public License v3.0 5 votes vote down vote up
def circ_corrcc(alpha, x):
    """Correlation coefficient between one circular and one linear random
    variable.
    
    Args:
        alpha: vector
            Sample of angles in radians

        x: vector
            Sample of linear random variable

    Returns:
        rho: float
            Correlation coefficient

        pval: float
            p-value

    Code taken from the Circular Statistics Toolbox for Matlab
    By Philipp Berens, 2009
    Python adaptation by Etienne Combrisson
    """
    if len(alpha) is not len(x):
        raise ValueError('The length of alpha and x must be the same')
    n = len(alpha)

    # Compute correlation coefficent for sin and cos independently
    rxs = pearsonr(x,np.sin(alpha))[0]
    rxc = pearsonr(x,np.cos(alpha))[0]
    rcs = pearsonr(np.sin(alpha),np.cos(alpha))[0]

    # Compute angular-linear correlation (equ. 27.47)
    rho = np.sqrt((rxc**2 + rxs**2 - 2*rxc*rxs*rcs)/(1-rcs**2));

    # Compute pvalue
    pval = 1 - chi2.cdf(n*rho**2,2);
    
    return rho, pval 
Example 16
Project: dfoil   Author: jbpease   File: dfoil.py    GNU General Public License v3.0 5 votes vote down vote up
def chi2_test(val0, val1):
    """Calculate Pearson Chi-Squared for the special case of
       two values that are expected to be equal
       Arguments:
           val0: first value
           val1: second value
    """
    try:
        chisq = float((val0 - val1)**2) / float(val0 + val1)
        if not chisq:
            return (0, 1)
        pval = 1.0 - chi2.cdf(chisq, 1)
        return (chisq, pval)
    except ZeroDivisionError as exc:
        return (0, 1) 
Example 17
Project: tQuakes   Author: seap-udea   File: tquakes.py    GNU General Public License v2.0 5 votes vote down vote up
def pValue(chisq,df):
    p=1-chiSquare.cdf(chisq,df)
    return p 
Example 18
Project: tQuakes   Author: seap-udea   File: tquakes.py    GNU General Public License v2.0 5 votes vote down vote up
def pValue(chisq,df):
    p=1-chiSquare.cdf(chisq,df)
    return p 
Example 19
Project: tQuakes   Author: seap-udea   File: tquakes.py    GNU General Public License v2.0 5 votes vote down vote up
def pValue(chisq,df):
    p=1-chiSquare.cdf(chisq,df)
    return p 
Example 20
Project: mvftools   Author: jbpease   File: mvfchromoplot.py    GNU General Public License v3.0 5 votes vote down vote up
def chi2_test(val0, val1):
    """Calculate Pearson Chi-Squared for two values that should be equal
    """
    try:
        chisq = float((val0 - val1)**2) / float(val0 + val1)
        if chisq == 0:
            return (0, 1)
        pval = 1.0 - chi2.cdf(chisq, 1)
        return (chisq, pval)
    except ZeroDivisionError:
        return (0, 1) 
Example 21
Project: scRNA-Seq   Author: broadinstitute   File: nearest_neighbors.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def calc_kBET_for_one_chunk(knn_indices, attr_values, ideal_dist, K):
	dof = ideal_dist.size - 1

	ns = knn_indices.shape[0]
	results = np.zeros((ns, 2))
	for i in range(ns):
		observed_counts = pd.Series(attr_values[knn_indices[i,:]]).value_counts(sort = False).values
		expected_counts = ideal_dist * K
		stat = np.sum(np.divide(np.square(np.subtract(observed_counts, expected_counts)), expected_counts))
		p_value = 1 - chi2.cdf(stat, dof)
		results[i, 0] = stat
		results[i, 1] = p_value

	return results 
Example 22
Project: PyData-London2015   Author: mvaz   File: model.py    MIT License 5 votes vote down vote up
def confidence(self, likelihood_stat):
        p_value = chi2.cdf(likelihood_stat, 1)
        return p_value 
Example 23
Project: jr-tools   Author: kingjr   File: base.py    BSD 2-Clause "Simplified" License 4 votes vote down vote up
def corr_linear_circular(X, alpha):
    # Authors:  Jean-Remi King <[email protected]>
    #           Niccolo Pescetelli <[email protected]>
    #
    # Licence : BSD-simplified
    """

    Parameters
    ----------
        X : numpy.array, shape (n_angles, n_dims)
            The linear data
        alpha : numpy.array, shape (n_angles,)
            The angular data (if n_dims == 1, repeated across all x dimensions)
    Returns
    -------
        R : numpy.array, shape (n_dims)
            R values
        R2 : numpy.array, shape (n_dims)
            R square values
        p_val : numpy.array, shape (n_dims)
            P values

    Adapted from:
        Circular Statistics Toolbox for Matlab
        By Philipp Berens, 2009
        [email protected] - www.kyb.mpg.de/~berens/circStat.html
        Equantion 27.47
    """

    from scipy.stats import chi2
    import numpy as np

    # computes correlation for sin and cos separately
    rxs = repeated_corr(X, np.sin(alpha))
    rxc = repeated_corr(X, np.cos(alpha))
    rcs = repeated_corr(np.sin(alpha), np.cos(alpha))

    # tile alpha across multiple dimension without requiring memory
    if X.ndim > 1 and alpha.ndim == 1:
        rcs = rcs[:, np.newaxis]

    # Adapted from equation 27.47
    R = (rxc ** 2 + rxs ** 2 - 2 * rxc * rxs * rcs) / (1 - rcs ** 2)

    # JR adhoc way of having a sign....
    R = np.sign(rxs) * np.sign(rxc) * R
    R2 = np.sqrt(R ** 2)

    # Get degrees of freedom
    n = len(alpha)
    pval = 1 - chi2.cdf(n * R2, 2)

    return R, R2, pval 
Example 24
Project: jr-tools   Author: kingjr   File: base.py    BSD 2-Clause "Simplified" License 4 votes vote down vote up
def corr_circular_linear(alpha, X):
    # Authors:  Jean-Remi King <[email protected]>
    #
    # Licence : BSD-simplified
    """

    Parameters
    ----------
        alpha : numpy.array, shape (n_angles,)
            The angular data (if n_dims == 1, repeated across all x dimensions)
        X : numpy.array, shape (n_angles, n_dims)
            The linear data
    Returns
    -------
        R : numpy.array, shape (n_dims)
            R values
        R2 : numpy.array, shape (n_dims)
            R square values
        p_val : numpy.array, shape (n_dims)
            P values

    Adapted from:
        Circular Statistics Toolbox for Matlab
        By Philipp Berens, 2009
        [email protected] - www.kyb.mpg.de/~berens/circStat.html
        Equantion 27.47
    """

    from scipy.stats import chi2
    from jr.utils import pairwise
    import numpy as np

    # computes correlation for sin and cos separately
    # WIP Applies repeated correlation if X is vector
    # TODO: deals with non repeated correlations (X * ALPHA)
    if alpha.ndim > 1:
        rxs = repeated_corr(np.sin(alpha), X)
        rxc = repeated_corr(np.cos(alpha), X)
        rcs = np.zeros_like(alpha[0, :])
        rcs = pairwise(np.sin(alpha), np.cos(alpha), func=_loop_corr,
                       n_jobs=-1)
    else:
        # WIP Applies repeated correlation if alpha is vector
        rxs = repeated_corr(X, np.sin(alpha))
        rxc = repeated_corr(X, np.cos(alpha))
        rcs = repeated_corr(np.sin(alpha), np.cos(alpha))

    # Adapted from equation 27.47
    R = (rxc ** 2 + rxs ** 2 - 2 * rxc * rxs * rcs) / (1 - rcs ** 2)

    # JR adhoc way of having a sign....
    R = np.sign(rxs) * np.sign(rxc) * R
    R2 = np.sqrt(R ** 2)

    # Get degrees of freedom
    n = len(X)
    pval = 1 - chi2.cdf(n * R2, 2)

    return R, R2, pval 
Example 25
Project: jMetalPy   Author: jMetal   File: functions.py    MIT License 4 votes vote down vote up
def friedman_aligned_rank_test(data):
    """ Method of aligned ranks for the Friedman test.

    ..note:: Null Hypothesis: In a set of k (>=2) treaments (or tested algorithms), all the treatments are equivalent, so their average ranks should be equal.

    :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
    :return p_value: The associated p-value.
    :return aligned_rank_stat: Friedman's aligned rank chi-square statistic.
    """

    # Initial Checking
    if type(data) == pd.DataFrame:
        data = data.values

    if data.ndim == 2:
        n_samples, k = data.shape
    else:
        raise ValueError(
            'Initialization ERROR. Incorrect number of array dimensions')
    if k < 2:
        raise ValueError(
            'Initialization Error. Incorrect number of dimensions for axis 1.')

    # Compute the average value achieved by all algorithms in each problem
    control = np.mean(data, axis=1)
    # Compute the difference between control an data
    diff = [data[:, j] - control for j in range(data.shape[1])]
    # rank diff
    alignedRanks = ranks(np.ravel(diff))
    alignedRanks = np.reshape(alignedRanks, newshape=(n_samples, k), order='F')

    # Compute statistic
    Rhat_i = np.sum(alignedRanks, axis=1)
    Rhat_j = np.sum(alignedRanks, axis=0)
    si, sj = np.sum(Rhat_i ** 2), np.sum(Rhat_j ** 2)

    A = sj - (k * n_samples ** 2 / 4.0) * (k * n_samples + 1) ** 2
    B1 = (k * n_samples * (k * n_samples + 1) * (2 * k * n_samples + 1) / 6.0)
    B2 = si / float(k)

    alignedRanks_stat = ((k - 1) * A) / (B1 - B2)

    p_value = 1 - chi2.cdf(alignedRanks_stat, df=k - 1)

    return pd.DataFrame(data=np.array([alignedRanks_stat, p_value]), index=['Aligned Rank stat', 'p-value'],
                        columns=['Results']) 
Example 26
Project: jMetalPy   Author: jMetal   File: functions.py    MIT License 4 votes vote down vote up
def quade_test(data):
    """ Quade test.

    ..note:: Null Hypothesis: In a set of k (>=2) treaments (or tested algorithms), all the treatments are equivalent, so their average ranks should be equal.

    :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
    :return p_value: The associated p-value from the F-distribution.
    :return fq: Computed F-value.
    """

    # Initial Checking
    if type(data) == pd.DataFrame:
        data = data.values

    if data.ndim == 2:
        n_samples, k = data.shape
    else:
        raise ValueError(
            'Initialization ERROR. Incorrect number of array dimensions')
    if k < 2:
        raise ValueError(
            'Initialization Error. Incorrect number of dimensions for axis 1.')

    # Compute ranks.
    datarank = ranks(data)
    # Compute the range of each problem
    problemRange = np.max(data, axis=1) - np.min(data, axis=1)
    # Compute problem rank
    problemRank = ranks(problemRange)

    # Compute S_stat: weight of each observation within the problem, adjusted to reflect
    # the significance of the problem when it appears.
    S_stat = np.zeros((n_samples, k))
    for i in range(n_samples):
        S_stat[i, :] = problemRank[i] * (datarank[i, :] - 0.5 * (k + 1))
    Salg = np.sum(S_stat, axis=0)

    # Compute Fq (Quade Test statistic) and associated p_value
    A = np.sum(S_stat ** 2)
    B = np.sum(Salg ** 2) / float(n_samples)

    if A == B:
        Fq = np.Inf
        p_value = (1 / (np.math.factorial(k))) ** (n_samples - 1)
    else:
        Fq = (n_samples - 1.0) * B / (A - B)
        p_value = 1 - f.cdf(Fq, k - 1, (k - 1) * (n_samples - 1))

    return pd.DataFrame(data=np.array([Fq, p_value]), index=['Quade Test statistic', 'p-value'], columns=['Results']) 
Example 27
Project: vnpy_crypto   Author: birforce   File: elanova.py    MIT License 4 votes vote down vote up
def compute_ANOVA(self, mu=None, mu_start=0, return_weights=0):

        """
        Returns -2 log likelihood, the pvalue and the maximum likelihood
        estimate for a common mean.

        Parameters
        ----------

        mu : float
            If a mu is specified, ANOVA is conducted with mu as the
            common mean.  Otherwise, the common mean is the maximum
            empirical likelihood estimate of the common mean.
            Default is None.

        mu_start : float
            Starting value for commean mean if specific mu is not specified.
            Default = 0

        return_weights : bool
            if TRUE, returns the weights on observations that maximize the
            likelihood.  Default is FALSE

        Returns
        -------

        res: tuple
            The log-likelihood, p-value and estimate for the common mean.
        """
        if mu is not None:
            llr = self._opt_common_mu(mu)
            pval = 1 - chi2.cdf(llr, self.num_groups - 1)
            if return_weights:
                return llr, pval, mu, self.new_weights
            else:
                return llr, pval, mu
        else:
            res = optimize.fmin_powell(self._opt_common_mu, mu_start,
                                       full_output=1, disp=False)
            llr = res[1]
            mu_common = float(res[0])
            pval = 1 - chi2.cdf(llr, self.num_groups - 1)
            if return_weights:
                return llr, pval, mu_common, self.new_weights
            else:
                return llr, pval, mu_common 
Example 28
Project: spikemetrics   Author: SpikeInterface   File: metrics.py    MIT License 4 votes vote down vote up
def mahalanobis_metrics(all_pcs, all_labels, this_unit_id):
    """ Calculates isolation distance and L-ratio (metrics computed from Mahalanobis distance)

    Based on metrics described in Schmitzer-Torbert et al. (2005) Neurosci 131: 1-11

    Inputs:
    -------
    all_pcs : numpy.ndarray (num_spikes x PCs)
        2D array of PCs for all spikes
    all_labels : numpy.ndarray (num_spikes x 0)
        1D array of cluster labels for all spikes
    this_unit_id : Int
        number corresponding to unit for which these metrics will be calculated

    Outputs:
    --------
    isolation_distance : float
        Isolation distance of this unit
    l_ratio : float
        L-ratio for this unit

    """

    pcs_for_this_unit = all_pcs[all_labels == this_unit_id, :]
    pcs_for_other_units = all_pcs[all_labels != this_unit_id, :]

    mean_value = np.expand_dims(np.mean(pcs_for_this_unit, 0), 0)

    try:
        VI = np.linalg.inv(np.cov(pcs_for_this_unit.T))
    except np.linalg.linalg.LinAlgError:  # case of singular matrix
        return np.nan, np.nan

    mahalanobis_other = np.sort(cdist(mean_value,
                                      pcs_for_other_units,
                                      'mahalanobis', VI=VI)[0])

    mahalanobis_self = np.sort(cdist(mean_value,
                                     pcs_for_this_unit,
                                     'mahalanobis', VI=VI)[0])

    n = np.min([pcs_for_this_unit.shape[0], pcs_for_other_units.shape[0]])  # number of spikes

    if n >= 2:
        dof = pcs_for_this_unit.shape[1]  # number of features
        l_ratio = np.sum(1 - chi2.cdf(pow(mahalanobis_other, 2), dof)) / mahalanobis_other.shape[0]
        isolation_distance = pow(mahalanobis_other[n - 1], 2)
        # if math.isnan(l_ratio):
        #     print("NaN detected", mahalanobis_other, VI)
    else:
        l_ratio = np.nan
        isolation_distance = np.nan

    return isolation_distance, l_ratio 
Example 29
Project: dave   Author: barentsen   File: covar.py    MIT License 4 votes vote down vote up
def computeProbabilityOfObservedOffset(x, y, p=None):
    """Compute probability that p is consistent with mean of distribution.

    For a 2 dimensional distribution of points, given by ``x`` and ``y``,
    compute the probability that the mean of the distribution is consistent
    with the input point ``p``

    Inputs:
    -------------
    x, y
        (float) Input values. Require that ``len(x) == len(y)``

    Optional Inputs:
    -----------------
    p
        (array of length 2) Point to test. Default is [0,0]

    Returns:
    -----------
    probOffset
        (float) Probability that input point is consistent with mean
        of distribution.
    chiSquare
        (float) The chi squared of the point. For highly inconsistent points
        the computed probability of offset flatlines at zero. The chisquare
        value can then be used to estimate the relative consistencies of
        different points.


    Notes:
    ---------
    See ``plotErrorEllipse`` for a description of the algorithm
    """

    if p is None:
        p = [0, 0]
    p = np.array(p)
    assert(len(p) == 2)

    assert(len(x) == len(y))
    if len(x) < 2:
        raise ValueError("Need at least two points to compute probability of offset")

    mu = np.array([np.mean(x), np.mean(y)])
    cov = np.cov(x, y) / np.sqrt(len(x))

    eigenVals, eigenVecs = np.linalg.eigh(cov)
    v1 = eigenVecs[:, 0]
    v2 = eigenVecs[:, 1]

    pDash = (p-mu)
    offset_pix = np.array([np.dot(pDash, v1), np.dot(pDash, v2)])
    sigma = np.sqrt(eigenVals)
    offset_sigma = offset_pix / sigma

    s = np.sum(offset_sigma**2)
    probOffset = chi2.cdf(s, 2)

    return probOffset, s 
Example 30
Project: Splunking-Crime   Author: nccgroup   File: elanova.py    GNU Affero General Public License v3.0 4 votes vote down vote up
def compute_ANOVA(self, mu=None, mu_start=0, return_weights=0):

        """
        Returns -2 log likelihood, the pvalue and the maximum likelihood
        estimate for a common mean.

        Parameters
        ----------

        mu : float
            If a mu is specified, ANOVA is conducted with mu as the
            common mean.  Otherwise, the common mean is the maximum
            empirical likelihood estimate of the common mean.
            Default is None.

        mu_start : float
            Starting value for commean mean if specific mu is not specified.
            Default = 0

        return_weights : bool
            if TRUE, returns the weights on observations that maximize the
            likelihood.  Default is FALSE

        Returns
        -------

        res: tuple
            The log-likelihood, p-value and estimate for the common mean.
        """
        if mu is not None:
            llr = self._opt_common_mu(mu)
            pval = 1 - chi2.cdf(llr, self.num_groups - 1)
            if return_weights:
                return llr, pval, mu, self.new_weights
            else:
                return llr, pval, mu
        else:
            res = optimize.fmin_powell(self._opt_common_mu, mu_start,
                                       full_output=1, disp=False)
            llr = res[1]
            mu_common = float(res[0])
            pval = 1 - chi2.cdf(llr, self.num_groups - 1)
            if return_weights:
                return llr, pval, mu_common, self.new_weights
            else:
                return llr, pval, mu_common