Python scipy.stats.anderson() Examples

The following are 19 code examples of scipy.stats.anderson(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: test_gaussianize.py    From gaussianize with MIT License 6 votes vote down vote up
def test_normality_increase_lambert(self):
        # Generate random data and check that it is more normal after inference
        for i, y in enumerate([np.random.standard_cauchy(size=ns), experimental_data]):
            print('Distribution %d' % i)
            print('Before')
            print(('anderson: %0.3f\tshapiro: %0.3f' % (anderson(y)[0], shapiro(y)[0])).expandtabs(30))
            stats.probplot(y, dist="norm", plot=plt)
            plt.savefig(os.path.join(self.test_dir, '%d_before.png' % i))
            plt.clf()
    
            tau = g.igmm(y)
            x = g.w_t(y, tau)
            print('After')
            print(('anderson: %0.3f\tshapiro: %0.3f' % (anderson(x)[0], shapiro(x)[0])).expandtabs(30))
            stats.probplot(x, dist="norm", plot=plt)
            plt.savefig(os.path.join(self.test_dir, '%d_after.png' % i))
            plt.clf() 
Example #2
Source File: microlstats.py    From pyLIMA with GNU General Public License v3.0 6 votes vote down vote up
def normal_Anderson_Darling(sample):
    """Compute a Anderson-Darling test on the sample versus a normal distribution with mu = 0, sigma = 1

            :param array_like sample: the sample you want to check the "Gaussianity"
            :returns: the Anderson-Darling statistic, the Anderson-Darling critical values associated to the significance
            level of 15 % and the Anderson-Darling judgement
            :rtype: float, array_like, array_like
    """

    AD_stat, AD_critical_values, AD_significance_levels = ss.anderson(sample)

    # the sample is likely Gaussian-like if AD_stat (~ maximum distance between sample and theoritical distribution) -> 0
    # the null hypothesis can not be rejected ( i.e the distribution of sample come from a Gaussian) if AD_pvalue -> 1


    AD_judgement = 0

    if AD_stat < 2*AD_critical_values[-1]:
        AD_judgement = 1

    if AD_stat < AD_critical_values[-1]:
        AD_judgement = 2
    return  AD_stat, AD_critical_values[-1], AD_judgement 
Example #3
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_normal(self):
        rs = RandomState(1234567890)
        x1 = rs.standard_exponential(size=50)
        x2 = rs.standard_normal(size=50)
        A, crit, sig = stats.anderson(x1)
        assert_array_less(crit[:-1], A)
        A, crit, sig = stats.anderson(x2)
        assert_array_less(A, crit[-2:])

        v = np.ones(10)
        v[0] = 0
        A, crit, sig = stats.anderson(v)
        # The expected statistic 3.208057 was computed independently of scipy.
        # For example, in R:
        #   > library(nortest)
        #   > v <- rep(1, 10)
        #   > v[1] <- 0
        #   > result <- ad.test(v)
        #   > result$statistic
        #          A
        #   3.208057
        assert_allclose(A, 3.208057) 
Example #4
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_bad_arg(self):
        assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp') 
Example #5
Source File: FeatureFunctionLib.py    From FATS with MIT License 5 votes vote down vote up
def fit(self, data):

        magnitude = data[0]
        ander = stats.anderson(magnitude)[0]
        return 1 / (1.0 + np.exp(-10 * (ander - 0.3))) 
Example #6
Source File: test_synthetic_data.py    From lightkurve with MIT License 5 votes vote down vote up
def test_detrending_residuals():
    """Test the detrending residual distributions"""
    # Retrieve the custom, known signal properties
    tpf = KeplerTargetPixelFile(filename_synthetic_flat)

    # Run the SFF algorithm
    lc = tpf.to_lightcurve()
    corrector = SFFCorrector(lc)
    cor_lc = corrector.correct(tpf.pos_corr2, tpf.pos_corr1,
                               niters=10, windows=5, bins=7, restore_trend=True)

    # Verify that we get a significant reduction in RMS
    cdpp_improvement = lc.estimate_cdpp() / cor_lc.estimate_cdpp()
    assert cdpp_improvement > 10.0

    # The residuals should be Gaussian-"ish"
    # Table 4.1 of Ivezic, Connolly, Vanerplas, Gray 2014
    anderson_threshold = 1.57

    resid_n_sigmas = (cor_lc.flux - np.mean(cor_lc.flux))/cor_lc.flux_err
    A_value, _, _ = stats.anderson(resid_n_sigmas)
    assert A_value**2 < anderson_threshold

    n_sigma = np.std(resid_n_sigmas)
    assert n_sigma < 2.0

    corrector = PLDCorrector(tpf)
    cor_lc = corrector.correct(use_gp=False)

    cdpp_improvement = lc.estimate_cdpp()/cor_lc.estimate_cdpp()
    assert cdpp_improvement > 10.0

    resid_n_sigmas = (cor_lc.flux - np.mean(cor_lc.flux))/cor_lc.flux_err
    A_value, crit, sig = stats.anderson(resid_n_sigmas)
    assert A_value**2 < anderson_threshold

    n_sigma = np.std(resid_n_sigmas)
    assert n_sigma < 2.0 
Example #7
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_gumbel_r(self):
        # gh-2592, gh-6337
        # Adds support to 'gumbel_r' and 'gumbel_l' as valid inputs for dist.
        rs = RandomState(1234567890)
        x1 = rs.gumbel(size=100)
        x2 = np.ones(100)
        A1, crit1, sig1 = stats.anderson(x1, 'gumbel_r')
        A2, crit2, sig2 = stats.anderson(x2, 'gumbel_r')

        assert_array_less(A1, crit1[-2:])
        assert_(A2 > crit2[-1]) 
Example #8
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_result_attributes(self):
        rs = RandomState(1234567890)
        x = rs.standard_exponential(size=50)
        res = stats.anderson(x)
        attributes = ('statistic', 'critical_values', 'significance_level')
        check_named_results(res, attributes) 
Example #9
Source File: ext_anderson_darling.py    From feets with MIT License 5 votes vote down vote up
def fit(self, magnitude):
        ander = stats.anderson(magnitude)[0]
        return {"AndersonDarling": 1 / (1.0 + np.exp(-10 * (ander - 0.3)))} 
Example #10
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_gumbel(self):
        # Regression test for gh-6306.  Before that issue was fixed,
        # this case would return a2=inf.
        v = np.ones(100)
        v[0] = 0.0
        a2, crit, sig = stats.anderson(v, 'gumbel')
        # A brief reimplementation of the calculation of the statistic.
        n = len(v)
        xbar, s = stats.gumbel_l.fit(v)
        logcdf = stats.gumbel_l.logcdf(v, xbar, s)
        logsf = stats.gumbel_l.logsf(v, xbar, s)
        i = np.arange(1, n+1)
        expected_a2 = -n - np.mean((2*i - 1) * (logcdf + logsf[::-1]))

        assert_allclose(a2, expected_a2) 
Example #11
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_expon(self):
        rs = RandomState(1234567890)
        x1 = rs.standard_exponential(size=50)
        x2 = rs.standard_normal(size=50)
        A, crit, sig = stats.anderson(x1, 'expon')
        assert_array_less(A, crit[-2:])
        olderr = np.seterr(all='ignore')
        try:
            A, crit, sig = stats.anderson(x2, 'expon')
        finally:
            np.seterr(**olderr)
        assert_(A > crit[-1]) 
Example #12
Source File: test_morestats.py    From Computable with MIT License 5 votes vote down vote up
def test_bad_arg(self):
        assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp') 
Example #13
Source File: test_morestats.py    From Computable with MIT License 5 votes vote down vote up
def test_expon(self):
        rs = RandomState(1234567890)
        x1 = rs.standard_exponential(size=50)
        x2 = rs.standard_normal(size=50)
        A,crit,sig = stats.anderson(x1,'expon')
        assert_array_less(A, crit[-2:])
        olderr = np.seterr(all='ignore')
        try:
            A,crit,sig = stats.anderson(x2,'expon')
        finally:
            np.seterr(**olderr)
        assert_(A > crit[-1]) 
Example #14
Source File: test_morestats.py    From Computable with MIT License 5 votes vote down vote up
def test_normal(self):
        rs = RandomState(1234567890)
        x1 = rs.standard_exponential(size=50)
        x2 = rs.standard_normal(size=50)
        A,crit,sig = stats.anderson(x1)
        assert_array_less(crit[:-1], A)
        A,crit,sig = stats.anderson(x2)
        assert_array_less(A, crit[-2:]) 
Example #15
Source File: chelmbigstock.py    From chelmbigstock with GNU General Public License v3.0 4 votes vote down vote up
def execute(training_data, cv_data, test_data): 
    """
    execute is the function where each run is done. main sets parameters
    then calls execute
    """
    
    clf, regularization_parameter = learn(training_data, cv_data)
    
    # do an Anderson Darling test on the data to determine if it is a normal fit
    A2, sig, crit = anderson(test_data.y, dist = 'norm')
    
    test_mn = np.mean(test_data.y)
    test_sd = np.std(test_data.y)
    
    
    predict_data = clf.predict(test_data.X)
    difference = predict_data - test_data.y
    diff_mn = np.mean(difference)
    diff_sd = np.std(difference)

    print("the value for A2 is ", A2)
    print("The mean and standard deviation of the test data are ",
            test_mn, test_sd)
    print("The mean and standard deviation of the difference are ",
            diff_mn, diff_sd)

    # make plot
    # correlation coefficent between prediction and actual data
    coef, dummy = pearsonr(predict_data, test_data.y)

    # compare per stock
    plt.plot(predict_data, test_data.y, 'ro')
    plt.title('Comparison per stock')
    plt.xlabel('Prediction')
    plt.ylabel('Actual')

    xmin, xmax, ymin, ymax = plt.axis()
    plt.text(xmin + (xmax - xmin) / 20.0, ymax - (ymax - ymin) / 20.0,
            'cor coef: {}'.format(coef),
            verticalalignment='top')

    # draw a line of perfect prediction
    if ymin > xmin:
        xmin = ymin
    if ymax < xmax:
        xmax = ymax
    xs = np.linspace(xmin, xmax)
    plt.plot(xs, xs, 'g--')

    # draw the plot
    plt.tight_layout()
    plt.show() 
Example #16
Source File: _adnorm.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def anderson_statistic(x, dist='norm', fit=True, params=(), axis=0):
    '''calculate anderson-darling A2 statistic

    Parameters
    ----------
    x : array_like
        data
    dist : 'norm' or callable
        null distribution for the test statistic
    fit : bool
        If True, then the distribution parameters are estimated.
        Currently only for 1d data x, except in case dist='norm'
    params : tuple
        optional distribution parameters if fit is False
    axis : integer
        If dist is 'norm' or fit is False, then data can be an n-dimensional
        and axis specifies the axis of a variable

    Returns
    -------
    ad2 : float or ndarray
        Anderson-Darling statistic


    '''
    x = np.asarray(x)
    y = np.sort(x, axis=axis)
    N = y.shape[axis]
    if fit:
        if dist == 'norm':
            xbar = np.expand_dims(np.mean(x, axis=axis), axis)
            s = np.expand_dims(np.std(x, ddof=1, axis=axis), axis)
            w = (y-xbar)/s
            z = stats.norm.cdf(w)
            #print z
        elif hasattr(dist, '__call__'):
            params = dist.fit(x)
            #print params
            z = dist.cdf(y, *params)
            print(z)
    else:
        if hasattr(dist, '__call__'):
            z = dist.cdf(y, *params)
        else:
            raise ValueError('if fit is false, then dist needs to be callable')

    i = np.arange(1,N+1)
    sl1 = [None]*x.ndim
    sl1[axis] = slice(None)
    sl2 = [slice(None)]*x.ndim
    sl2[axis] = slice(None,None,-1)
    S = np.sum((2*i[sl1]-1.0)/N*(np.log(z)+np.log(1-z[sl2])), axis=axis)
    A2 = -N-S
    return A2 
Example #17
Source File: _adnorm.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def normal_ad(x, axis=0):
    '''Anderson-Darling test for normal distribution unknown mean and variance

    Parameters
    ----------
    x : array_like
        data array, currently only 1d

    Returns
    -------
    ad2 : float
        Anderson Darling test statistic
    pval : float
        pvalue for hypothesis that the data comes from a normal distribution
        with unknown mean and variance

    '''
    #ad2 = stats.anderson(x)[0]
    ad2 = anderson_statistic(x, dist='norm', fit=True, axis=axis)
    n = x.shape[axis]

    ad2a = ad2 * (1 + 0.75/n + 2.25/n**2)

    if np.size(ad2a) == 1:
        if (ad2a >= 0.00 and ad2a < 0.200):
            pval = 1 - np.exp(-13.436 + 101.14 * ad2a - 223.73 * ad2a**2)
        elif ad2a < 0.340:
            pval = 1 - np.exp(-8.318 + 42.796 * ad2a - 59.938 * ad2a**2)
        elif ad2a < 0.600:
            pval = np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a**2)
        elif ad2a <= 13:
            pval = np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a**2)
        else:
            pval = 0.0  # is < 4.9542108058458799e-31

    else:
        bounds = np.array([0.0, 0.200, 0.340, 0.600])

        pval0 = lambda ad2a: np.nan*np.ones_like(ad2a)
        pval1 = lambda ad2a: 1 - np.exp(-13.436 + 101.14 * ad2a - 223.73 * ad2a**2)
        pval2 = lambda ad2a: 1 - np.exp(-8.318 + 42.796 * ad2a - 59.938 * ad2a**2)
        pval3 = lambda ad2a: np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a**2)
        pval4 = lambda ad2a: np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a**2)

        pvalli = [pval0, pval1, pval2, pval3, pval4]

        idx = np.searchsorted(bounds, ad2a, side='right')
        pval = np.nan*np.ones_like(ad2a)
        for i in range(5):
            mask = (idx == i)
            pval[mask] = pvalli[i](ad2a[mask])

    return ad2, pval 
Example #18
Source File: _adnorm.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def normal_ad(x, axis=0):
    '''Anderson-Darling test for normal distribution unknown mean and variance

    Parameters
    ----------
    x : array_like
        data array, currently only 1d

    Returns
    -------
    ad2 : float
        Anderson Darling test statistic
    pval : float
        pvalue for hypothesis that the data comes from a normal distribution
        with unknown mean and variance

    '''
    #ad2 = stats.anderson(x)[0]
    ad2 = anderson_statistic(x, dist='norm', fit=True, axis=axis)
    n = x.shape[axis]

    ad2a = ad2 * (1 + 0.75/n + 2.25/n**2)

    if np.size(ad2a) == 1:
        if (ad2a >= 0.00 and ad2a < 0.200):
            pval = 1 - np.exp(-13.436 + 101.14 * ad2a - 223.73 * ad2a**2)
        elif ad2a < 0.340:
            pval = 1 - np.exp(-8.318 + 42.796 * ad2a - 59.938 * ad2a**2)
        elif ad2a < 0.600:
            pval = np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a**2)
        elif ad2a <= 13:
            pval = np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a**2)
        else:
            pval = 0.0  # is < 4.9542108058458799e-31

    else:
        bounds = np.array([0.0, 0.200, 0.340, 0.600])

        pval0 = lambda ad2a: np.nan*np.ones_like(ad2a)
        pval1 = lambda ad2a: 1 - np.exp(-13.436 + 101.14 * ad2a - 223.73 * ad2a**2)
        pval2 = lambda ad2a: 1 - np.exp(-8.318 + 42.796 * ad2a - 59.938 * ad2a**2)
        pval3 = lambda ad2a: np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a**2)
        pval4 = lambda ad2a: np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a**2)

        pvalli = [pval0, pval1, pval2, pval3, pval4]

        idx = np.searchsorted(bounds, ad2a, side='right')
        pval = np.nan*np.ones_like(ad2a)
        for i in range(5):
            mask = (idx == i)
            pval[mask] = pvalli[i](ad2a[mask])

    return ad2, pval 
Example #19
Source File: _adnorm.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def anderson_statistic(x, dist='norm', fit=True, params=(), axis=0):
    '''calculate anderson-darling A2 statistic

    Parameters
    ----------
    x : array_like
        data
    dist : 'norm' or callable
        null distribution for the test statistic
    fit : bool
        If True, then the distribution parameters are estimated.
        Currently only for 1d data x, except in case dist='norm'
    params : tuple
        optional distribution parameters if fit is False
    axis : integer
        If dist is 'norm' or fit is False, then data can be an n-dimensional
        and axis specifies the axis of a variable

    Returns
    -------
    ad2 : float or ndarray
        Anderson-Darling statistic


    '''
    x = np.asarray(x)
    y = np.sort(x, axis=axis)
    N = y.shape[axis]
    if fit:
        if dist == 'norm':
            xbar = np.expand_dims(np.mean(x, axis=axis), axis)
            s = np.expand_dims(np.std(x, ddof=1, axis=axis), axis)
            w = (y-xbar)/s
            z = stats.norm.cdf(w)
            #print z
        elif hasattr(dist, '__call__'):
            params = dist.fit(x)
            #print params
            z = dist.cdf(y, *params)
            print(z)
    else:
        if hasattr(dist, '__call__'):
            z = dist.cdf(y, *params)
        else:
            raise ValueError('if fit is false, then dist needs to be callable')

    i = np.arange(1,N+1)
    sl1 = [None]*x.ndim
    sl1[axis] = slice(None)
    sl2 = [slice(None)]*x.ndim
    sl2[axis] = slice(None,None,-1)
    S = np.sum((2*i[sl1]-1.0)/N*(np.log(z)+np.log(1-z[sl2])), axis=axis)
    A2 = -N-S
    return A2