Python scipy.stats.describe() Examples

The following are 18 code examples of scipy.stats.describe(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: pstatcounter.py    From sparklingpandas with Apache License 2.0 6 votes vote down vote up
def merge(self, frame):
        """
        Add another DataFrame to the accumulated stats for each column.
        Parameters
        ----------
        frame: pandas DataFrame we will update our stats counter with.
        """
        for column_name, _ in self._column_stats.items():
            data_arr = frame[[column_name]].values
            count, min_max_tup, mean, _, _, _ = \
                scistats.describe(data_arr)
            stats_counter = StatCounter()
            stats_counter.n = count
            stats_counter.mu = mean
            stats_counter.m2 = np.sum((data_arr - mean) ** 2)
            stats_counter.minValue, stats_counter.maxValue = min_max_tup
            self._column_stats[column_name] = self._column_stats[
                column_name].mergeStats(stats_counter)
        return self 
Example #2
Source File: test_stats.py    From Computable with MIT License 6 votes vote down vote up
def test_describe():
    x = np.vstack((np.ones((3,4)),2*np.ones((2,4))))
    nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.]))
    mc = np.array([1.4, 1.4, 1.4, 1.4])
    vc = np.array([0.3, 0.3, 0.3, 0.3])
    skc = [0.40824829046386357]*4
    kurtc = [-1.833333333333333]*4
    n, mm, m, v, sk, kurt = stats.describe(x)
    assert_equal(n, nc)
    assert_equal(mm, mmc)
    assert_equal(m, mc)
    assert_equal(v, vc)
    assert_array_almost_equal(sk, skc, decimal=13)  # not sure about precision
    assert_array_almost_equal(kurt, kurtc, decimal=13)
    n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
    assert_equal(n, nc)
    assert_equal(mm, mmc)
    assert_equal(m, mc)
    assert_equal(v, vc)
    assert_array_almost_equal(sk, skc, decimal=13)  # not sure about precision
    assert_array_almost_equal(kurt, kurtc, decimal=13) 
Example #3
Source File: pretrain_embedding.py    From CCKS2019-IPRE with Apache License 2.0 6 votes vote down vote up
def stat(seq_length, type):
    print('Seq len info :')
    seq_len = np.asarray(seq_length)
    idx = np.arange(0, len(seq_len), dtype=np.int32)
    print(stats.describe(seq_len))
    plt.figure(figsize=(16, 9))
    plt.subplot(121)
    plt.plot(idx[:], seq_len[:], 'ro')
    plt.grid(True)
    plt.xlabel('index')
    plt.ylabel('seq_len')
    plt.title('Scatter Plot')

    plt.subplot(122)
    plt.hist(seq_len, bins=10, label=['seq_len'])
    plt.grid(True)
    plt.xlabel('seq_len')
    plt.ylabel('freq')
    plt.title('Histogram')
    plt.savefig(type + '_len_stats.jpg', format='jpg') 
Example #4
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_describe_axis_none(self):
        x = np.vstack((np.ones((3, 4)), 2 * np.ones((2, 4))))

        # expected values
        e_nobs, e_minmax = (20, (1.0, 2.0))
        e_mean = 1.3999999999999999
        e_var = 0.25263157894736848
        e_skew = 0.4082482904638634
        e_kurt = -1.8333333333333333

        # actual values
        a = stats.describe(x, axis=None)

        assert_equal(a.nobs, e_nobs)
        assert_almost_equal(a.minmax, e_minmax)
        assert_almost_equal(a.mean, e_mean)
        assert_almost_equal(a.variance, e_var)
        assert_array_almost_equal(a.skewness, e_skew, decimal=13)
        assert_array_almost_equal(a.kurtosis, e_kurt, decimal=13) 
Example #5
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_describe_numbers(self):
        x = np.vstack((np.ones((3,4)), 2 * np.ones((2,4))))
        nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.]))
        mc = np.array([1.4, 1.4, 1.4, 1.4])
        vc = np.array([0.3, 0.3, 0.3, 0.3])
        skc = [0.40824829046386357] * 4
        kurtc = [-1.833333333333333] * 4
        n, mm, m, v, sk, kurt = stats.describe(x)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)
        n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        x = np.arange(10.)
        x[9] = np.nan

        nc, mmc = (9, (0.0, 8.0))
        mc = 4.0
        vc = 7.5
        skc = 0.0
        kurtc = -1.2300000000000002
        n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='omit')
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        assert_raises(ValueError, stats.describe, x, nan_policy='raise')
        assert_raises(ValueError, stats.describe, x, nan_policy='foobar') 
Example #6
Source File: ABuStatsUtil.py    From abu with GNU General Public License v3.0 5 votes vote down vote up
def print_stats(arr):
    """
    对arr的统计相关信息输出
        eg:
                input:

                            tsla	bidu	noah	sfun	goog	vips	aapl
                2014-07-25	223.57	226.50	15.32	12.110	589.02	21.349	97.67
                2014-07-28	224.82	225.80	16.13	12.450	590.60	21.548	99.02
                2014-07-29	225.01	220.00	16.75	12.220	585.61	21.190	98.38
                ...	...	...	...	...	...	...	...
                2016-07-22	222.27	160.88	25.50	4.850	742.74	13.510	98.66
                2016-07-25	230.01	160.25	25.57	4.790	739.77	13.390	97.34
                2016-07-26	225.93	163.09	24.75	4.945	740.92	13.655	97.76

                output:

                array size = 504
                array min  = [ 143.67  132.37   12.95    4.44  492.55   10.35   90.34]
                array max  = [ 286.04  250.34   37.32   12.52  776.6    30.    133.  ]
                array mean = [ 228.4885  193.4488   23.7362    7.2458  624.3016   19.0181  110.881 ]
                array var  = [  653.509    752.7421    30.1604     3.3109  7310.2084    27.0994
                   135.156 ]
                array std  = [ 25.5638  27.4361   5.4919   1.8196  85.4998   5.2057  11.6257]
                array skew = [-0.2826 -0.2544  0.1456  1.0322  0.2095  0.095   0.1719]
                array kurt = [ 0.0093 -0.8414 -0.4205  0.4802 -1.547  -0.9203 -1.2104]
    :param arr: pd.DataFrame or pd.Series or Iterable
    """
    stats = scs.describe(arr)

    log_func = logging.info if ABuEnv.g_is_ipython else print
    log_func('array size = {}'.format(stats[0]))
    log_func('array min  = {}'.format(stats[1][0]))
    log_func('array max  = {}'.format(stats[1][1]))
    log_func('array mean = {}'.format(stats[2]))
    log_func('array var  = {}'.format(stats[3]))
    log_func('array std  = {}'.format(np.sqrt(stats[3])))
    log_func('array skew = {}'.format(stats[4]))
    log_func('array kurt = {}'.format(stats[5])) 
Example #7
Source File: extras.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self,args, **kwds):
        #todo: replace with super call
        distributions.rv_continuous.__init__(self,
            name = 'Normal Expansion distribution', shapes = ' ',
            extradoc = '''
        The distribution is defined as the Gram-Charlier expansion of
        the normal distribution using the first four moments. The pdf
        is given by

        pdf(x) = (1+ skew/6.0 * H(xc,3) + kurt/24.0 * H(xc,4))*normpdf(xc)

        where xc = (x-mu)/sig is the standardized value of the random variable
        and H(xc,3) and H(xc,4) are Hermite polynomials

        Note: This distribution has to be parameterized during
        initialization and instantiation, and does not have a shape
        parameter after instantiation (similar to frozen distribution
        except for location and scale.) Location and scale can be used
        as with other distributions, however note, that they are relative
        to the initialized distribution.
        '''  )
        #print args, kwds
        mode = kwds.get('mode', 'sample')

        if mode == 'sample':
            mu,sig,sk,kur = stats.describe(args)[2:]
            self.mvsk = (mu,sig,sk,kur)
            cnt = mvsk2mc((mu,sig,sk,kur))
        elif mode == 'mvsk':
            cnt = mvsk2mc(args)
            self.mvsk = args
        elif mode == 'centmom':
            cnt = args
            self.mvsk = mc2mvsk(cnt)
        else:
            raise ValueError("mode must be 'mvsk' or centmom")

        self.cnt = cnt
        #self.mvsk = (mu,sig,sk,kur)
        #self._pdf = pdf_moments(cnt)
        self._pdf = pdf_mvsk(self.mvsk) 
Example #8
Source File: ex_extras.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def examples_normexpand():
    skewnorm = SkewNorm_gen()
    rvs = skewnorm.rvs(5,size=100)
    normexpan = NormExpan_gen(rvs, mode='sample')

    smvsk = stats.describe(rvs)[2:]
    print('sample: mu,sig,sk,kur')
    print(smvsk)

    dmvsk = normexpan.stats(moments='mvsk')
    print('normexpan: mu,sig,sk,kur')
    print(dmvsk)
    print('mvsk diff distribution - sample')
    print(np.array(dmvsk) - np.array(smvsk))
    print('normexpan attributes mvsk')
    print(mc2mvsk(normexpan.cnt))
    print(normexpan.mvsk)

    mnc = mvsk2mnc(dmvsk)
    mc = mnc2mc(mnc)
    print('central moments')
    print(mc)
    print('non-central moments')
    print(mnc)


    pdffn = pdf_moments(mc)
    print('\npdf approximation from moments')
    print('pdf at', mc[0]-1,mc[0]+1)
    print(pdffn([mc[0]-1,mc[0]+1]))
    print(normexpan.pdf([mc[0]-1,mc[0]+1])) 
Example #9
Source File: __init__.py    From script-languages with MIT License 5 votes vote down vote up
def __init__(self,metric_name, measurements):
        self._metric_name = metric_name
        self._measurements = measurements
        print("measurements:",self._measurements)
        array=np.array(self._measurements)
        self._stats = stats.describe(array)
        self._bayes_mvs = stats.bayes_mvs(array) 
Example #10
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_describe_empty(self):
        assert_raises(ValueError, stats.describe, []) 
Example #11
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = stats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes) 
Example #12
Source File: ex_extras.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def examples_normexpand():
    skewnorm = SkewNorm_gen()
    rvs = skewnorm.rvs(5,size=100)
    normexpan = NormExpan_gen(rvs, mode='sample')

    smvsk = stats.describe(rvs)[2:]
    print('sample: mu,sig,sk,kur')
    print(smvsk)

    dmvsk = normexpan.stats(moments='mvsk')
    print('normexpan: mu,sig,sk,kur')
    print(dmvsk)
    print('mvsk diff distribution - sample')
    print(np.array(dmvsk) - np.array(smvsk))
    print('normexpan attributes mvsk')
    print(mc2mvsk(normexpan.cnt))
    print(normexpan.mvsk)

    mnc = mvsk2mnc(dmvsk)
    mc = mnc2mc(mnc)
    print('central moments')
    print(mc)
    print('non-central moments')
    print(mnc)


    pdffn = pdf_moments(mc)
    print('\npdf approximation from moments')
    print('pdf at', mc[0]-1,mc[0]+1)
    print(pdffn([mc[0]-1,mc[0]+1]))
    print(normexpan.pdf([mc[0]-1,mc[0]+1])) 
Example #13
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_describe_scalar(self):
        with suppress_warnings() as sup, np.errstate(invalid="ignore"):
            sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
            n, mm, m, v, sk, kurt = stats.describe(4.)
        assert_equal(n, 1)
        assert_equal(mm, (4.0, 4.0))
        assert_equal(m, 4.0)
        assert_(np.isnan(v))
        assert_array_almost_equal(sk, 0.0, decimal=13)
        assert_array_almost_equal(kurt, -3.0, decimal=13) 
Example #14
Source File: test_mstats_basic.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = mstats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes, ma=True) 
Example #15
Source File: test_mstats_basic.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_describe(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.describe(x, ddof=1)
            rm = stats.mstats.describe(xm, ddof=1)
            for ii in range(6):
                assert_almost_equal(np.asarray(r[ii]),
                                    np.asarray(rm[ii]),
                                    decimal=12) 
Example #16
Source File: gw_optim.py    From otalign with GNU General Public License v3.0 5 votes vote down vote up
def compute_distances(self, X, Y):
        print('Computing intra-domain distance matrices...')

        if not self.gpu:
            C1 = sp.spatial.distance.cdist(X, X, metric=self.metric)
            C2 = sp.spatial.distance.cdist(Y, Y, metric=self.metric)
            if self.normalize_dists == 'max':
                print('here')
                C1 /= C1.max()
                C2 /= C2.max()
            elif self.normalize_dists == 'mean':
                C1 /= C1.mean()
                C2 /= C2.mean()
            elif self.normalize_dists == 'median':
                C1 /= np.median(C1)
                C2 /= np.median(C2)
        else:
            C1 = cdist(X, X, metric=self.metric, returnAsGPU=True)
            C2 = cdist(Y, Y, metric=self.metric, returnAsGPU=True)
            if self.normalize_dists == 'max':
                C1.divide(float(np.max(C1.asarray())))
                C2.divide(float(np.max(C2.asarray())))
            elif self.normalize_dists == 'mean':
                C1.divide(float(np.mean(C1.asarray())))
                C2.divide(float(np.mean(C2.asarray())))
            elif self.normalize_dists == 'median':
                raise NotImplemented(
                    "Median normalization not implemented in GPU yet")

        stats_C1 = describe(C1.flatten())
        stats_C2 = describe(C2.flatten())

        for (k, C, v) in [('C1', C1, stats_C1), ('C2', C2, stats_C2)]:
            print('Stats Distance Matrix {}. mean: {:8.2f}, median: {:8.2f},\
             min: {:8.2f}, max:{:8.2f}'.format(k, v.mean, np.median(C), v.minmax[0], v.minmax[1]))

        self.C1, self.C2 = C1, C2 
Example #17
Source File: extras.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def __init__(self,args, **kwds):
        #todo: replace with super call
        distributions.rv_continuous.__init__(self,
            name = 'Normal Expansion distribution', shapes = ' ',
            extradoc = '''
        The distribution is defined as the Gram-Charlier expansion of
        the normal distribution using the first four moments. The pdf
        is given by

        pdf(x) = (1+ skew/6.0 * H(xc,3) + kurt/24.0 * H(xc,4))*normpdf(xc)

        where xc = (x-mu)/sig is the standardized value of the random variable
        and H(xc,3) and H(xc,4) are Hermite polynomials

        Note: This distribution has to be parameterized during
        initialization and instantiation, and does not have a shape
        parameter after instantiation (similar to frozen distribution
        except for location and scale.) Location and scale can be used
        as with other distributions, however note, that they are relative
        to the initialized distribution.
        '''  )
        #print args, kwds
        mode = kwds.get('mode', 'sample')

        if mode == 'sample':
            mu,sig,sk,kur = stats.describe(args)[2:]
            self.mvsk = (mu,sig,sk,kur)
            cnt = mvsk2mc((mu,sig,sk,kur))
        elif mode == 'mvsk':
            cnt = mvsk2mc(args)
            self.mvsk = args
        elif mode == 'centmom':
            cnt = args
            self.mvsk = mc2mvsk(cnt)
        else:
            raise ValueError("mode must be 'mvsk' or centmom")

        self.cnt = cnt
        #self.mvsk = (mu,sig,sk,kur)
        #self._pdf = pdf_moments(cnt)
        self._pdf = pdf_mvsk(self.mvsk) 
Example #18
Source File: test_norm_expan.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_mvsk(self):
        mvsk = stats.describe(self.rvs)[-4:]
        assert_allclose(self.dist2.mvsk, mvsk, rtol=1e-12)