Python scipy.stats.describe() Examples
The following are 18
code examples of scipy.stats.describe().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.stats
, or try the search function
.
Example #1
Source File: pstatcounter.py From sparklingpandas with Apache License 2.0 | 6 votes |
def merge(self, frame): """ Add another DataFrame to the accumulated stats for each column. Parameters ---------- frame: pandas DataFrame we will update our stats counter with. """ for column_name, _ in self._column_stats.items(): data_arr = frame[[column_name]].values count, min_max_tup, mean, _, _, _ = \ scistats.describe(data_arr) stats_counter = StatCounter() stats_counter.n = count stats_counter.mu = mean stats_counter.m2 = np.sum((data_arr - mean) ** 2) stats_counter.minValue, stats_counter.maxValue = min_max_tup self._column_stats[column_name] = self._column_stats[ column_name].mergeStats(stats_counter) return self
Example #2
Source File: test_stats.py From Computable with MIT License | 6 votes |
def test_describe(): x = np.vstack((np.ones((3,4)),2*np.ones((2,4)))) nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.])) mc = np.array([1.4, 1.4, 1.4, 1.4]) vc = np.array([0.3, 0.3, 0.3, 0.3]) skc = [0.40824829046386357]*4 kurtc = [-1.833333333333333]*4 n, mm, m, v, sk, kurt = stats.describe(x) assert_equal(n, nc) assert_equal(mm, mmc) assert_equal(m, mc) assert_equal(v, vc) assert_array_almost_equal(sk, skc, decimal=13) # not sure about precision assert_array_almost_equal(kurt, kurtc, decimal=13) n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1) assert_equal(n, nc) assert_equal(mm, mmc) assert_equal(m, mc) assert_equal(v, vc) assert_array_almost_equal(sk, skc, decimal=13) # not sure about precision assert_array_almost_equal(kurt, kurtc, decimal=13)
Example #3
Source File: pretrain_embedding.py From CCKS2019-IPRE with Apache License 2.0 | 6 votes |
def stat(seq_length, type): print('Seq len info :') seq_len = np.asarray(seq_length) idx = np.arange(0, len(seq_len), dtype=np.int32) print(stats.describe(seq_len)) plt.figure(figsize=(16, 9)) plt.subplot(121) plt.plot(idx[:], seq_len[:], 'ro') plt.grid(True) plt.xlabel('index') plt.ylabel('seq_len') plt.title('Scatter Plot') plt.subplot(122) plt.hist(seq_len, bins=10, label=['seq_len']) plt.grid(True) plt.xlabel('seq_len') plt.ylabel('freq') plt.title('Histogram') plt.savefig(type + '_len_stats.jpg', format='jpg')
Example #4
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_describe_axis_none(self): x = np.vstack((np.ones((3, 4)), 2 * np.ones((2, 4)))) # expected values e_nobs, e_minmax = (20, (1.0, 2.0)) e_mean = 1.3999999999999999 e_var = 0.25263157894736848 e_skew = 0.4082482904638634 e_kurt = -1.8333333333333333 # actual values a = stats.describe(x, axis=None) assert_equal(a.nobs, e_nobs) assert_almost_equal(a.minmax, e_minmax) assert_almost_equal(a.mean, e_mean) assert_almost_equal(a.variance, e_var) assert_array_almost_equal(a.skewness, e_skew, decimal=13) assert_array_almost_equal(a.kurtosis, e_kurt, decimal=13)
Example #5
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_describe_numbers(self): x = np.vstack((np.ones((3,4)), 2 * np.ones((2,4)))) nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.])) mc = np.array([1.4, 1.4, 1.4, 1.4]) vc = np.array([0.3, 0.3, 0.3, 0.3]) skc = [0.40824829046386357] * 4 kurtc = [-1.833333333333333] * 4 n, mm, m, v, sk, kurt = stats.describe(x) assert_equal(n, nc) assert_equal(mm, mmc) assert_equal(m, mc) assert_equal(v, vc) assert_array_almost_equal(sk, skc, decimal=13) assert_array_almost_equal(kurt, kurtc, decimal=13) n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1) assert_equal(n, nc) assert_equal(mm, mmc) assert_equal(m, mc) assert_equal(v, vc) assert_array_almost_equal(sk, skc, decimal=13) assert_array_almost_equal(kurt, kurtc, decimal=13) x = np.arange(10.) x[9] = np.nan nc, mmc = (9, (0.0, 8.0)) mc = 4.0 vc = 7.5 skc = 0.0 kurtc = -1.2300000000000002 n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='omit') assert_equal(n, nc) assert_equal(mm, mmc) assert_equal(m, mc) assert_equal(v, vc) assert_array_almost_equal(sk, skc) assert_array_almost_equal(kurt, kurtc, decimal=13) assert_raises(ValueError, stats.describe, x, nan_policy='raise') assert_raises(ValueError, stats.describe, x, nan_policy='foobar')
Example #6
Source File: ABuStatsUtil.py From abu with GNU General Public License v3.0 | 5 votes |
def print_stats(arr): """ 对arr的统计相关信息输出 eg: input: tsla bidu noah sfun goog vips aapl 2014-07-25 223.57 226.50 15.32 12.110 589.02 21.349 97.67 2014-07-28 224.82 225.80 16.13 12.450 590.60 21.548 99.02 2014-07-29 225.01 220.00 16.75 12.220 585.61 21.190 98.38 ... ... ... ... ... ... ... ... 2016-07-22 222.27 160.88 25.50 4.850 742.74 13.510 98.66 2016-07-25 230.01 160.25 25.57 4.790 739.77 13.390 97.34 2016-07-26 225.93 163.09 24.75 4.945 740.92 13.655 97.76 output: array size = 504 array min = [ 143.67 132.37 12.95 4.44 492.55 10.35 90.34] array max = [ 286.04 250.34 37.32 12.52 776.6 30. 133. ] array mean = [ 228.4885 193.4488 23.7362 7.2458 624.3016 19.0181 110.881 ] array var = [ 653.509 752.7421 30.1604 3.3109 7310.2084 27.0994 135.156 ] array std = [ 25.5638 27.4361 5.4919 1.8196 85.4998 5.2057 11.6257] array skew = [-0.2826 -0.2544 0.1456 1.0322 0.2095 0.095 0.1719] array kurt = [ 0.0093 -0.8414 -0.4205 0.4802 -1.547 -0.9203 -1.2104] :param arr: pd.DataFrame or pd.Series or Iterable """ stats = scs.describe(arr) log_func = logging.info if ABuEnv.g_is_ipython else print log_func('array size = {}'.format(stats[0])) log_func('array min = {}'.format(stats[1][0])) log_func('array max = {}'.format(stats[1][1])) log_func('array mean = {}'.format(stats[2])) log_func('array var = {}'.format(stats[3])) log_func('array std = {}'.format(np.sqrt(stats[3]))) log_func('array skew = {}'.format(stats[4])) log_func('array kurt = {}'.format(stats[5]))
Example #7
Source File: extras.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self,args, **kwds): #todo: replace with super call distributions.rv_continuous.__init__(self, name = 'Normal Expansion distribution', shapes = ' ', extradoc = ''' The distribution is defined as the Gram-Charlier expansion of the normal distribution using the first four moments. The pdf is given by pdf(x) = (1+ skew/6.0 * H(xc,3) + kurt/24.0 * H(xc,4))*normpdf(xc) where xc = (x-mu)/sig is the standardized value of the random variable and H(xc,3) and H(xc,4) are Hermite polynomials Note: This distribution has to be parameterized during initialization and instantiation, and does not have a shape parameter after instantiation (similar to frozen distribution except for location and scale.) Location and scale can be used as with other distributions, however note, that they are relative to the initialized distribution. ''' ) #print args, kwds mode = kwds.get('mode', 'sample') if mode == 'sample': mu,sig,sk,kur = stats.describe(args)[2:] self.mvsk = (mu,sig,sk,kur) cnt = mvsk2mc((mu,sig,sk,kur)) elif mode == 'mvsk': cnt = mvsk2mc(args) self.mvsk = args elif mode == 'centmom': cnt = args self.mvsk = mc2mvsk(cnt) else: raise ValueError("mode must be 'mvsk' or centmom") self.cnt = cnt #self.mvsk = (mu,sig,sk,kur) #self._pdf = pdf_moments(cnt) self._pdf = pdf_mvsk(self.mvsk)
Example #8
Source File: ex_extras.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def examples_normexpand(): skewnorm = SkewNorm_gen() rvs = skewnorm.rvs(5,size=100) normexpan = NormExpan_gen(rvs, mode='sample') smvsk = stats.describe(rvs)[2:] print('sample: mu,sig,sk,kur') print(smvsk) dmvsk = normexpan.stats(moments='mvsk') print('normexpan: mu,sig,sk,kur') print(dmvsk) print('mvsk diff distribution - sample') print(np.array(dmvsk) - np.array(smvsk)) print('normexpan attributes mvsk') print(mc2mvsk(normexpan.cnt)) print(normexpan.mvsk) mnc = mvsk2mnc(dmvsk) mc = mnc2mc(mnc) print('central moments') print(mc) print('non-central moments') print(mnc) pdffn = pdf_moments(mc) print('\npdf approximation from moments') print('pdf at', mc[0]-1,mc[0]+1) print(pdffn([mc[0]-1,mc[0]+1])) print(normexpan.pdf([mc[0]-1,mc[0]+1]))
Example #9
Source File: __init__.py From script-languages with MIT License | 5 votes |
def __init__(self,metric_name, measurements): self._metric_name = metric_name self._measurements = measurements print("measurements:",self._measurements) array=np.array(self._measurements) self._stats = stats.describe(array) self._bayes_mvs = stats.bayes_mvs(array)
Example #10
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_describe_empty(self): assert_raises(ValueError, stats.describe, [])
Example #11
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_describe_result_attributes(self): actual = stats.describe(np.arange(5)) attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness', 'kurtosis') check_named_results(actual, attributes)
Example #12
Source File: ex_extras.py From vnpy_crypto with MIT License | 5 votes |
def examples_normexpand(): skewnorm = SkewNorm_gen() rvs = skewnorm.rvs(5,size=100) normexpan = NormExpan_gen(rvs, mode='sample') smvsk = stats.describe(rvs)[2:] print('sample: mu,sig,sk,kur') print(smvsk) dmvsk = normexpan.stats(moments='mvsk') print('normexpan: mu,sig,sk,kur') print(dmvsk) print('mvsk diff distribution - sample') print(np.array(dmvsk) - np.array(smvsk)) print('normexpan attributes mvsk') print(mc2mvsk(normexpan.cnt)) print(normexpan.mvsk) mnc = mvsk2mnc(dmvsk) mc = mnc2mc(mnc) print('central moments') print(mc) print('non-central moments') print(mnc) pdffn = pdf_moments(mc) print('\npdf approximation from moments') print('pdf at', mc[0]-1,mc[0]+1) print(pdffn([mc[0]-1,mc[0]+1])) print(normexpan.pdf([mc[0]-1,mc[0]+1]))
Example #13
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_describe_scalar(self): with suppress_warnings() as sup, np.errstate(invalid="ignore"): sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice") n, mm, m, v, sk, kurt = stats.describe(4.) assert_equal(n, 1) assert_equal(mm, (4.0, 4.0)) assert_equal(m, 4.0) assert_(np.isnan(v)) assert_array_almost_equal(sk, 0.0, decimal=13) assert_array_almost_equal(kurt, -3.0, decimal=13)
Example #14
Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_describe_result_attributes(self): actual = mstats.describe(np.arange(5)) attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness', 'kurtosis') check_named_results(actual, attributes, ma=True)
Example #15
Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_describe(self): for n in self.get_n(): x, y, xm, ym = self.generate_xy_sample(n) r = stats.describe(x, ddof=1) rm = stats.mstats.describe(xm, ddof=1) for ii in range(6): assert_almost_equal(np.asarray(r[ii]), np.asarray(rm[ii]), decimal=12)
Example #16
Source File: gw_optim.py From otalign with GNU General Public License v3.0 | 5 votes |
def compute_distances(self, X, Y): print('Computing intra-domain distance matrices...') if not self.gpu: C1 = sp.spatial.distance.cdist(X, X, metric=self.metric) C2 = sp.spatial.distance.cdist(Y, Y, metric=self.metric) if self.normalize_dists == 'max': print('here') C1 /= C1.max() C2 /= C2.max() elif self.normalize_dists == 'mean': C1 /= C1.mean() C2 /= C2.mean() elif self.normalize_dists == 'median': C1 /= np.median(C1) C2 /= np.median(C2) else: C1 = cdist(X, X, metric=self.metric, returnAsGPU=True) C2 = cdist(Y, Y, metric=self.metric, returnAsGPU=True) if self.normalize_dists == 'max': C1.divide(float(np.max(C1.asarray()))) C2.divide(float(np.max(C2.asarray()))) elif self.normalize_dists == 'mean': C1.divide(float(np.mean(C1.asarray()))) C2.divide(float(np.mean(C2.asarray()))) elif self.normalize_dists == 'median': raise NotImplemented( "Median normalization not implemented in GPU yet") stats_C1 = describe(C1.flatten()) stats_C2 = describe(C2.flatten()) for (k, C, v) in [('C1', C1, stats_C1), ('C2', C2, stats_C2)]: print('Stats Distance Matrix {}. mean: {:8.2f}, median: {:8.2f},\ min: {:8.2f}, max:{:8.2f}'.format(k, v.mean, np.median(C), v.minmax[0], v.minmax[1])) self.C1, self.C2 = C1, C2
Example #17
Source File: extras.py From vnpy_crypto with MIT License | 5 votes |
def __init__(self,args, **kwds): #todo: replace with super call distributions.rv_continuous.__init__(self, name = 'Normal Expansion distribution', shapes = ' ', extradoc = ''' The distribution is defined as the Gram-Charlier expansion of the normal distribution using the first four moments. The pdf is given by pdf(x) = (1+ skew/6.0 * H(xc,3) + kurt/24.0 * H(xc,4))*normpdf(xc) where xc = (x-mu)/sig is the standardized value of the random variable and H(xc,3) and H(xc,4) are Hermite polynomials Note: This distribution has to be parameterized during initialization and instantiation, and does not have a shape parameter after instantiation (similar to frozen distribution except for location and scale.) Location and scale can be used as with other distributions, however note, that they are relative to the initialized distribution. ''' ) #print args, kwds mode = kwds.get('mode', 'sample') if mode == 'sample': mu,sig,sk,kur = stats.describe(args)[2:] self.mvsk = (mu,sig,sk,kur) cnt = mvsk2mc((mu,sig,sk,kur)) elif mode == 'mvsk': cnt = mvsk2mc(args) self.mvsk = args elif mode == 'centmom': cnt = args self.mvsk = mc2mvsk(cnt) else: raise ValueError("mode must be 'mvsk' or centmom") self.cnt = cnt #self.mvsk = (mu,sig,sk,kur) #self._pdf = pdf_moments(cnt) self._pdf = pdf_mvsk(self.mvsk)
Example #18
Source File: test_norm_expan.py From vnpy_crypto with MIT License | 5 votes |
def test_mvsk(self): mvsk = stats.describe(self.rvs)[-4:] assert_allclose(self.dist2.mvsk, mvsk, rtol=1e-12)