Python scipy.stats.anderson() Examples
The following are 19
code examples of scipy.stats.anderson().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.stats
, or try the search function
.
Example #1
Source File: test_gaussianize.py From gaussianize with MIT License | 6 votes |
def test_normality_increase_lambert(self): # Generate random data and check that it is more normal after inference for i, y in enumerate([np.random.standard_cauchy(size=ns), experimental_data]): print('Distribution %d' % i) print('Before') print(('anderson: %0.3f\tshapiro: %0.3f' % (anderson(y)[0], shapiro(y)[0])).expandtabs(30)) stats.probplot(y, dist="norm", plot=plt) plt.savefig(os.path.join(self.test_dir, '%d_before.png' % i)) plt.clf() tau = g.igmm(y) x = g.w_t(y, tau) print('After') print(('anderson: %0.3f\tshapiro: %0.3f' % (anderson(x)[0], shapiro(x)[0])).expandtabs(30)) stats.probplot(x, dist="norm", plot=plt) plt.savefig(os.path.join(self.test_dir, '%d_after.png' % i)) plt.clf()
Example #2
Source File: microlstats.py From pyLIMA with GNU General Public License v3.0 | 6 votes |
def normal_Anderson_Darling(sample): """Compute a Anderson-Darling test on the sample versus a normal distribution with mu = 0, sigma = 1 :param array_like sample: the sample you want to check the "Gaussianity" :returns: the Anderson-Darling statistic, the Anderson-Darling critical values associated to the significance level of 15 % and the Anderson-Darling judgement :rtype: float, array_like, array_like """ AD_stat, AD_critical_values, AD_significance_levels = ss.anderson(sample) # the sample is likely Gaussian-like if AD_stat (~ maximum distance between sample and theoritical distribution) -> 0 # the null hypothesis can not be rejected ( i.e the distribution of sample come from a Gaussian) if AD_pvalue -> 1 AD_judgement = 0 if AD_stat < 2*AD_critical_values[-1]: AD_judgement = 1 if AD_stat < AD_critical_values[-1]: AD_judgement = 2 return AD_stat, AD_critical_values[-1], AD_judgement
Example #3
Source File: test_morestats.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_normal(self): rs = RandomState(1234567890) x1 = rs.standard_exponential(size=50) x2 = rs.standard_normal(size=50) A, crit, sig = stats.anderson(x1) assert_array_less(crit[:-1], A) A, crit, sig = stats.anderson(x2) assert_array_less(A, crit[-2:]) v = np.ones(10) v[0] = 0 A, crit, sig = stats.anderson(v) # The expected statistic 3.208057 was computed independently of scipy. # For example, in R: # > library(nortest) # > v <- rep(1, 10) # > v[1] <- 0 # > result <- ad.test(v) # > result$statistic # A # 3.208057 assert_allclose(A, 3.208057)
Example #4
Source File: test_morestats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_bad_arg(self): assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp')
Example #5
Source File: FeatureFunctionLib.py From FATS with MIT License | 5 votes |
def fit(self, data): magnitude = data[0] ander = stats.anderson(magnitude)[0] return 1 / (1.0 + np.exp(-10 * (ander - 0.3)))
Example #6
Source File: test_synthetic_data.py From lightkurve with MIT License | 5 votes |
def test_detrending_residuals(): """Test the detrending residual distributions""" # Retrieve the custom, known signal properties tpf = KeplerTargetPixelFile(filename_synthetic_flat) # Run the SFF algorithm lc = tpf.to_lightcurve() corrector = SFFCorrector(lc) cor_lc = corrector.correct(tpf.pos_corr2, tpf.pos_corr1, niters=10, windows=5, bins=7, restore_trend=True) # Verify that we get a significant reduction in RMS cdpp_improvement = lc.estimate_cdpp() / cor_lc.estimate_cdpp() assert cdpp_improvement > 10.0 # The residuals should be Gaussian-"ish" # Table 4.1 of Ivezic, Connolly, Vanerplas, Gray 2014 anderson_threshold = 1.57 resid_n_sigmas = (cor_lc.flux - np.mean(cor_lc.flux))/cor_lc.flux_err A_value, _, _ = stats.anderson(resid_n_sigmas) assert A_value**2 < anderson_threshold n_sigma = np.std(resid_n_sigmas) assert n_sigma < 2.0 corrector = PLDCorrector(tpf) cor_lc = corrector.correct(use_gp=False) cdpp_improvement = lc.estimate_cdpp()/cor_lc.estimate_cdpp() assert cdpp_improvement > 10.0 resid_n_sigmas = (cor_lc.flux - np.mean(cor_lc.flux))/cor_lc.flux_err A_value, crit, sig = stats.anderson(resid_n_sigmas) assert A_value**2 < anderson_threshold n_sigma = np.std(resid_n_sigmas) assert n_sigma < 2.0
Example #7
Source File: test_morestats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_gumbel_r(self): # gh-2592, gh-6337 # Adds support to 'gumbel_r' and 'gumbel_l' as valid inputs for dist. rs = RandomState(1234567890) x1 = rs.gumbel(size=100) x2 = np.ones(100) A1, crit1, sig1 = stats.anderson(x1, 'gumbel_r') A2, crit2, sig2 = stats.anderson(x2, 'gumbel_r') assert_array_less(A1, crit1[-2:]) assert_(A2 > crit2[-1])
Example #8
Source File: test_morestats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_result_attributes(self): rs = RandomState(1234567890) x = rs.standard_exponential(size=50) res = stats.anderson(x) attributes = ('statistic', 'critical_values', 'significance_level') check_named_results(res, attributes)
Example #9
Source File: ext_anderson_darling.py From feets with MIT License | 5 votes |
def fit(self, magnitude): ander = stats.anderson(magnitude)[0] return {"AndersonDarling": 1 / (1.0 + np.exp(-10 * (ander - 0.3)))}
Example #10
Source File: test_morestats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_gumbel(self): # Regression test for gh-6306. Before that issue was fixed, # this case would return a2=inf. v = np.ones(100) v[0] = 0.0 a2, crit, sig = stats.anderson(v, 'gumbel') # A brief reimplementation of the calculation of the statistic. n = len(v) xbar, s = stats.gumbel_l.fit(v) logcdf = stats.gumbel_l.logcdf(v, xbar, s) logsf = stats.gumbel_l.logsf(v, xbar, s) i = np.arange(1, n+1) expected_a2 = -n - np.mean((2*i - 1) * (logcdf + logsf[::-1])) assert_allclose(a2, expected_a2)
Example #11
Source File: test_morestats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_expon(self): rs = RandomState(1234567890) x1 = rs.standard_exponential(size=50) x2 = rs.standard_normal(size=50) A, crit, sig = stats.anderson(x1, 'expon') assert_array_less(A, crit[-2:]) olderr = np.seterr(all='ignore') try: A, crit, sig = stats.anderson(x2, 'expon') finally: np.seterr(**olderr) assert_(A > crit[-1])
Example #12
Source File: test_morestats.py From Computable with MIT License | 5 votes |
def test_bad_arg(self): assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp')
Example #13
Source File: test_morestats.py From Computable with MIT License | 5 votes |
def test_expon(self): rs = RandomState(1234567890) x1 = rs.standard_exponential(size=50) x2 = rs.standard_normal(size=50) A,crit,sig = stats.anderson(x1,'expon') assert_array_less(A, crit[-2:]) olderr = np.seterr(all='ignore') try: A,crit,sig = stats.anderson(x2,'expon') finally: np.seterr(**olderr) assert_(A > crit[-1])
Example #14
Source File: test_morestats.py From Computable with MIT License | 5 votes |
def test_normal(self): rs = RandomState(1234567890) x1 = rs.standard_exponential(size=50) x2 = rs.standard_normal(size=50) A,crit,sig = stats.anderson(x1) assert_array_less(crit[:-1], A) A,crit,sig = stats.anderson(x2) assert_array_less(A, crit[-2:])
Example #15
Source File: chelmbigstock.py From chelmbigstock with GNU General Public License v3.0 | 4 votes |
def execute(training_data, cv_data, test_data): """ execute is the function where each run is done. main sets parameters then calls execute """ clf, regularization_parameter = learn(training_data, cv_data) # do an Anderson Darling test on the data to determine if it is a normal fit A2, sig, crit = anderson(test_data.y, dist = 'norm') test_mn = np.mean(test_data.y) test_sd = np.std(test_data.y) predict_data = clf.predict(test_data.X) difference = predict_data - test_data.y diff_mn = np.mean(difference) diff_sd = np.std(difference) print("the value for A2 is ", A2) print("The mean and standard deviation of the test data are ", test_mn, test_sd) print("The mean and standard deviation of the difference are ", diff_mn, diff_sd) # make plot # correlation coefficent between prediction and actual data coef, dummy = pearsonr(predict_data, test_data.y) # compare per stock plt.plot(predict_data, test_data.y, 'ro') plt.title('Comparison per stock') plt.xlabel('Prediction') plt.ylabel('Actual') xmin, xmax, ymin, ymax = plt.axis() plt.text(xmin + (xmax - xmin) / 20.0, ymax - (ymax - ymin) / 20.0, 'cor coef: {}'.format(coef), verticalalignment='top') # draw a line of perfect prediction if ymin > xmin: xmin = ymin if ymax < xmax: xmax = ymax xs = np.linspace(xmin, xmax) plt.plot(xs, xs, 'g--') # draw the plot plt.tight_layout() plt.show()
Example #16
Source File: _adnorm.py From Splunking-Crime with GNU Affero General Public License v3.0 | 4 votes |
def anderson_statistic(x, dist='norm', fit=True, params=(), axis=0): '''calculate anderson-darling A2 statistic Parameters ---------- x : array_like data dist : 'norm' or callable null distribution for the test statistic fit : bool If True, then the distribution parameters are estimated. Currently only for 1d data x, except in case dist='norm' params : tuple optional distribution parameters if fit is False axis : integer If dist is 'norm' or fit is False, then data can be an n-dimensional and axis specifies the axis of a variable Returns ------- ad2 : float or ndarray Anderson-Darling statistic ''' x = np.asarray(x) y = np.sort(x, axis=axis) N = y.shape[axis] if fit: if dist == 'norm': xbar = np.expand_dims(np.mean(x, axis=axis), axis) s = np.expand_dims(np.std(x, ddof=1, axis=axis), axis) w = (y-xbar)/s z = stats.norm.cdf(w) #print z elif hasattr(dist, '__call__'): params = dist.fit(x) #print params z = dist.cdf(y, *params) print(z) else: if hasattr(dist, '__call__'): z = dist.cdf(y, *params) else: raise ValueError('if fit is false, then dist needs to be callable') i = np.arange(1,N+1) sl1 = [None]*x.ndim sl1[axis] = slice(None) sl2 = [slice(None)]*x.ndim sl2[axis] = slice(None,None,-1) S = np.sum((2*i[sl1]-1.0)/N*(np.log(z)+np.log(1-z[sl2])), axis=axis) A2 = -N-S return A2
Example #17
Source File: _adnorm.py From Splunking-Crime with GNU Affero General Public License v3.0 | 4 votes |
def normal_ad(x, axis=0): '''Anderson-Darling test for normal distribution unknown mean and variance Parameters ---------- x : array_like data array, currently only 1d Returns ------- ad2 : float Anderson Darling test statistic pval : float pvalue for hypothesis that the data comes from a normal distribution with unknown mean and variance ''' #ad2 = stats.anderson(x)[0] ad2 = anderson_statistic(x, dist='norm', fit=True, axis=axis) n = x.shape[axis] ad2a = ad2 * (1 + 0.75/n + 2.25/n**2) if np.size(ad2a) == 1: if (ad2a >= 0.00 and ad2a < 0.200): pval = 1 - np.exp(-13.436 + 101.14 * ad2a - 223.73 * ad2a**2) elif ad2a < 0.340: pval = 1 - np.exp(-8.318 + 42.796 * ad2a - 59.938 * ad2a**2) elif ad2a < 0.600: pval = np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a**2) elif ad2a <= 13: pval = np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a**2) else: pval = 0.0 # is < 4.9542108058458799e-31 else: bounds = np.array([0.0, 0.200, 0.340, 0.600]) pval0 = lambda ad2a: np.nan*np.ones_like(ad2a) pval1 = lambda ad2a: 1 - np.exp(-13.436 + 101.14 * ad2a - 223.73 * ad2a**2) pval2 = lambda ad2a: 1 - np.exp(-8.318 + 42.796 * ad2a - 59.938 * ad2a**2) pval3 = lambda ad2a: np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a**2) pval4 = lambda ad2a: np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a**2) pvalli = [pval0, pval1, pval2, pval3, pval4] idx = np.searchsorted(bounds, ad2a, side='right') pval = np.nan*np.ones_like(ad2a) for i in range(5): mask = (idx == i) pval[mask] = pvalli[i](ad2a[mask]) return ad2, pval
Example #18
Source File: _adnorm.py From vnpy_crypto with MIT License | 4 votes |
def normal_ad(x, axis=0): '''Anderson-Darling test for normal distribution unknown mean and variance Parameters ---------- x : array_like data array, currently only 1d Returns ------- ad2 : float Anderson Darling test statistic pval : float pvalue for hypothesis that the data comes from a normal distribution with unknown mean and variance ''' #ad2 = stats.anderson(x)[0] ad2 = anderson_statistic(x, dist='norm', fit=True, axis=axis) n = x.shape[axis] ad2a = ad2 * (1 + 0.75/n + 2.25/n**2) if np.size(ad2a) == 1: if (ad2a >= 0.00 and ad2a < 0.200): pval = 1 - np.exp(-13.436 + 101.14 * ad2a - 223.73 * ad2a**2) elif ad2a < 0.340: pval = 1 - np.exp(-8.318 + 42.796 * ad2a - 59.938 * ad2a**2) elif ad2a < 0.600: pval = np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a**2) elif ad2a <= 13: pval = np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a**2) else: pval = 0.0 # is < 4.9542108058458799e-31 else: bounds = np.array([0.0, 0.200, 0.340, 0.600]) pval0 = lambda ad2a: np.nan*np.ones_like(ad2a) pval1 = lambda ad2a: 1 - np.exp(-13.436 + 101.14 * ad2a - 223.73 * ad2a**2) pval2 = lambda ad2a: 1 - np.exp(-8.318 + 42.796 * ad2a - 59.938 * ad2a**2) pval3 = lambda ad2a: np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a**2) pval4 = lambda ad2a: np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a**2) pvalli = [pval0, pval1, pval2, pval3, pval4] idx = np.searchsorted(bounds, ad2a, side='right') pval = np.nan*np.ones_like(ad2a) for i in range(5): mask = (idx == i) pval[mask] = pvalli[i](ad2a[mask]) return ad2, pval
Example #19
Source File: _adnorm.py From vnpy_crypto with MIT License | 4 votes |
def anderson_statistic(x, dist='norm', fit=True, params=(), axis=0): '''calculate anderson-darling A2 statistic Parameters ---------- x : array_like data dist : 'norm' or callable null distribution for the test statistic fit : bool If True, then the distribution parameters are estimated. Currently only for 1d data x, except in case dist='norm' params : tuple optional distribution parameters if fit is False axis : integer If dist is 'norm' or fit is False, then data can be an n-dimensional and axis specifies the axis of a variable Returns ------- ad2 : float or ndarray Anderson-Darling statistic ''' x = np.asarray(x) y = np.sort(x, axis=axis) N = y.shape[axis] if fit: if dist == 'norm': xbar = np.expand_dims(np.mean(x, axis=axis), axis) s = np.expand_dims(np.std(x, ddof=1, axis=axis), axis) w = (y-xbar)/s z = stats.norm.cdf(w) #print z elif hasattr(dist, '__call__'): params = dist.fit(x) #print params z = dist.cdf(y, *params) print(z) else: if hasattr(dist, '__call__'): z = dist.cdf(y, *params) else: raise ValueError('if fit is false, then dist needs to be callable') i = np.arange(1,N+1) sl1 = [None]*x.ndim sl1[axis] = slice(None) sl2 = [slice(None)]*x.ndim sl2[axis] = slice(None,None,-1) S = np.sum((2*i[sl1]-1.0)/N*(np.log(z)+np.log(1-z[sl2])), axis=axis) A2 = -N-S return A2