Python scipy.stats.describe() Examples

The following are code examples for showing how to use scipy.stats.describe(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_describe(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.describe(x, ddof=1)
            rm = stats.mstats.describe(xm, ddof=1)
            for ii in range(6):
                assert_almost_equal(np.asarray(r[ii]),
                                    np.asarray(rm[ii]),
                                    decimal=12) 
Example 2
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 6 votes vote down vote up
def test_describe_axis_none(self):
        x = np.vstack((np.ones((3, 4)), 2 * np.ones((2, 4))))

        # expected values
        e_nobs, e_minmax = (20, (1.0, 2.0))
        e_mean = 1.3999999999999999
        e_var = 0.25263157894736848
        e_skew = 0.4082482904638634
        e_kurt = -1.8333333333333333

        # actual values
        a = stats.describe(x, axis=None)

        assert_equal(a.nobs, e_nobs)
        assert_almost_equal(a.minmax, e_minmax)
        assert_almost_equal(a.mean, e_mean)
        assert_almost_equal(a.variance, e_var)
        assert_array_almost_equal(a.skewness, e_skew, decimal=13)
        assert_array_almost_equal(a.kurtosis, e_kurt, decimal=13) 
Example 3
Project: recsys-random-walk   Author: TimovNiedek   File: metrics.py    Apache License 2.0 6 votes vote down vote up
def aggregate_metrics(ground_truth, sub, k, candidates):
    r_precision = []
    ndcg = []
    plex_clicks = []
    miss = 0
    cnt = 0
    for p in candidates:
        cnt += 1
        if p not in sub:
            miss += 1
            m = Metrics(0, 0, 0)  # TODO: make sure this is right
        else:
            m = get_all_metrics(ground_truth[p], sub[p], k)
        r_precision.append(m.r_precision)
        ndcg.append(m.ndcg)
        plex_clicks.append(m.plex_clicks)

    cov = 1 - miss / float(cnt)
    return MetricsSummary(
        stats.describe(r_precision).mean,
        stats.describe(ndcg).mean,
        stats.describe(plex_clicks).mean,
        cov
    ) 
Example 4
Project: BiLatticeRNN-data-processing   Author: alecokas   File: error_distribution.py    MIT License 6 votes vote down vote up
def save_statistics(error_array, target_file_name):
    # Remove file if it exists
    try:
        os.remove(target_file_name)
    except OSError:
        pass

    stats_dict = {}
    error_type = ['Start Time', 'End Time', 'Duration']

    for errors, error_type in zip(error_array, error_type):
        errors = list(filter(lambda a: a != 0, errors))
        stats_dict[error_type] = stats.describe(errors)

    # Write 
    print(error_type)
    with open(target_file_name + '.pickle', 'wb') as tgt_file:
            pickle.dump(stats_dict, tgt_file, protocol=pickle.HIGHEST_PROTOCOL) 
Example 5
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_stats.py    GNU General Public License v3.0 6 votes vote down vote up
def test_describe_numbers(self):
        x = np.vstack((np.ones((3,4)), 2 * np.ones((2,4))))
        nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.]))
        mc = np.array([1.4, 1.4, 1.4, 1.4])
        vc = np.array([0.3, 0.3, 0.3, 0.3])
        skc = [0.40824829046386357] * 4
        kurtc = [-1.833333333333333] * 4
        n, mm, m, v, sk, kurt = stats.describe(x)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        # not sure about precision with sk, skc
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)
        n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        # not sure about precision with sk, skc
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13) 
Example 6
Project: homework   Author: Iydon   File: simulation.py    MIT License 6 votes vote down vote up
def print_statistics(a1, a2):
    """
    Prints selected statistics.

    Parameters
    ==========
    al, a2 : ndarray objects
    results object from simulation
    """
    # 主体
    sta1 = scs.describe(a1)
    sta2 = scs.describe(a2)
    print("%14s %14s %14s"%("statistic", "data set 1", "data set 2"))
    print("-" * 45)
    print("%14s %14.3f %14.3f"%("size", sta1[0], sta2[0]))
    print("%14s %14.3f %14.3f"%("min", sta1[1][0], sta2[1][0]))
    print("%14s %14.3f %14.3f"%("max", sta1[1][1], sta2[1][1]))
    print("%14s %14.3f %14.3f"%("mean", sta1[2], sta2[2]))
    print("%14s %14.3f %14.3f"%("std", np.sqrt(sta1[3]), np.sqrt(sta2[3])))
    print("%14s %14.3f %14.3f"%("skew", sta1[4], sta2[4]))
    print("%14s %14.3f %14.3f"%("kurtosis", sta1[5], sta2[5])) 
Example 7
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 6 votes vote down vote up
def test_describe_axis_none(self):
        x = np.vstack((np.ones((3, 4)), 2 * np.ones((2, 4))))

        # expected values
        e_nobs, e_minmax = (20, (1.0, 2.0))
        e_mean = 1.3999999999999999
        e_var = 0.25263157894736848
        e_skew = 0.4082482904638634
        e_kurt = -1.8333333333333333

        # actual values
        a = stats.describe(x, axis=None)

        assert_equal(a.nobs, e_nobs)
        assert_almost_equal(a.minmax, e_minmax)
        assert_almost_equal(a.mean, e_mean)
        assert_almost_equal(a.variance, e_var)
        assert_array_almost_equal(a.skewness, e_skew, decimal=13)
        assert_array_almost_equal(a.kurtosis, e_kurt, decimal=13) 
Example 8
Project: surfclass   Author: Kortforsyningen   File: prepare.py    MIT License 6 votes vote down vote up
def traindata(indataset, inlyr, attrib, rasterfiles, outputfile):
    """Extracts training data defined by polygons with a class from a set of raster features.

    Example:
    surfclass prepare traindata --in train_polys.gpkg --inlyr areas --attrib classno -f feature1.tif
        -f feature2.tif -f feature3.tif my_traning_data.npz

    """
    # Print feature order. This is important.
    click.echo("Extracting training data from features:")
    for i, fp in enumerate(rasterfiles):
        click.echo(f"f{i+1}: {fp}")

    (classes, features) = train.collect_training_data(
        indataset, inlyr, attrib, rasterfiles
    )
    click.echo("Stats for extracted training data:")
    click.echo(stats.describe(classes))
    click.echo("Stats for extracted feature data:")
    click.echo(stats.describe(features))
    train.save_training_data(outputfile, rasterfiles, classes, features) 
Example 9
Project: surfclass   Author: Kortforsyningen   File: prepare.py    MIT License 6 votes vote down vote up
def traindatainfo(datafile):
    """Shows basic information about extracted training data.

    Example:
    surclass prepare traindatainfo my_traning_data

    """
    # TODO: Beautify output like
    # Number of observations: xxx
    # f1: min=x max=y mean=z
    # f2: ...
    file_paths, classes, features = train.load_training_data(datafile)
    click.echo("Trained from features:")
    for i, fp in enumerate(file_paths):
        click.echo(f"f{i+1}: {fp}")
    click.echo("Stats for classes:")
    click.echo(stats.describe(classes))
    click.echo("Stats for features:")
    click.echo(stats.describe(features)) 
Example 10
Project: Computable   Author: ktraunmueller   File: test_stats.py    MIT License 6 votes vote down vote up
def test_describe():
    x = np.vstack((np.ones((3,4)),2*np.ones((2,4))))
    nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.]))
    mc = np.array([1.4, 1.4, 1.4, 1.4])
    vc = np.array([0.3, 0.3, 0.3, 0.3])
    skc = [0.40824829046386357]*4
    kurtc = [-1.833333333333333]*4
    n, mm, m, v, sk, kurt = stats.describe(x)
    assert_equal(n, nc)
    assert_equal(mm, mmc)
    assert_equal(m, mc)
    assert_equal(v, vc)
    assert_array_almost_equal(sk, skc, decimal=13)  # not sure about precision
    assert_array_almost_equal(kurt, kurtc, decimal=13)
    n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
    assert_equal(n, nc)
    assert_equal(mm, mmc)
    assert_equal(m, mc)
    assert_equal(v, vc)
    assert_array_almost_equal(sk, skc, decimal=13)  # not sure about precision
    assert_array_almost_equal(kurt, kurtc, decimal=13) 
Example 11
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 6 votes vote down vote up
def test_describe_axis_none(self):
        x = np.vstack((np.ones((3, 4)), 2 * np.ones((2, 4))))

        # expected values
        e_nobs, e_minmax = (20, (1.0, 2.0))
        e_mean = 1.3999999999999999
        e_var = 0.25263157894736848
        e_skew = 0.4082482904638634
        e_kurt = -1.8333333333333333

        # actual values
        a = stats.describe(x, axis=None)

        assert_equal(a.nobs, e_nobs)
        assert_almost_equal(a.minmax, e_minmax)
        assert_almost_equal(a.mean, e_mean)
        assert_almost_equal(a.variance, e_var)
        assert_array_almost_equal(a.skewness, e_skew, decimal=13)
        assert_array_almost_equal(a.kurtosis, e_kurt, decimal=13) 
Example 12
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 6 votes vote down vote up
def test_describe_axis_none(self):
        x = np.vstack((np.ones((3, 4)), 2 * np.ones((2, 4))))

        # expected values
        e_nobs, e_minmax = (20, (1.0, 2.0))
        e_mean = 1.3999999999999999
        e_var = 0.25263157894736848
        e_skew = 0.4082482904638634
        e_kurt = -1.8333333333333333

        # actual values
        a = stats.describe(x, axis=None)

        assert_equal(a.nobs, e_nobs)
        assert_almost_equal(a.minmax, e_minmax)
        assert_almost_equal(a.mean, e_mean)
        assert_almost_equal(a.variance, e_var)
        assert_array_almost_equal(a.skewness, e_skew, decimal=13)
        assert_array_almost_equal(a.kurtosis, e_kurt, decimal=13) 
Example 13
Project: CCKS2019-IPRE   Author: shiningliang   File: pretrain_embedding.py    Apache License 2.0 6 votes vote down vote up
def stat(seq_length, type):
    print('Seq len info :')
    seq_len = np.asarray(seq_length)
    idx = np.arange(0, len(seq_len), dtype=np.int32)
    print(stats.describe(seq_len))
    plt.figure(figsize=(16, 9))
    plt.subplot(121)
    plt.plot(idx[:], seq_len[:], 'ro')
    plt.grid(True)
    plt.xlabel('index')
    plt.ylabel('seq_len')
    plt.title('Scatter Plot')

    plt.subplot(122)
    plt.hist(seq_len, bins=10, label=['seq_len'])
    plt.grid(True)
    plt.xlabel('seq_len')
    plt.ylabel('freq')
    plt.title('Histogram')
    plt.savefig(type + '_len_stats.jpg', format='jpg') 
Example 14
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 6 votes vote down vote up
def test_describe_axis_none(self):
        x = np.vstack((np.ones((3, 4)), 2 * np.ones((2, 4))))

        # expected values
        e_nobs, e_minmax = (20, (1.0, 2.0))
        e_mean = 1.3999999999999999
        e_var = 0.25263157894736848
        e_skew = 0.4082482904638634
        e_kurt = -1.8333333333333333

        # actual values
        a = stats.describe(x, axis=None)

        assert_equal(a.nobs, e_nobs)
        assert_almost_equal(a.minmax, e_minmax)
        assert_almost_equal(a.mean, e_mean)
        assert_almost_equal(a.variance, e_var)
        assert_array_almost_equal(a.skewness, e_skew, decimal=13)
        assert_array_almost_equal(a.kurtosis, e_kurt, decimal=13) 
Example 15
Project: aae-recommender   Author: lgalke   File: mpd_metrics.py    GNU General Public License v3.0 6 votes vote down vote up
def aggregate_metrics(ground_truth, sub, k, candidates):
    r_precision = []
    ndcg = []
    plex_clicks = []
    miss = 0
    cnt = 0
    for p in candidates:
        cnt += 1
        if p not in sub:
            miss += 1
            m = Metrics(0, 0, 0)  # TODO: make sure this is right
        else:
            m = get_all_metrics(ground_truth[p], sub[p], k)
        r_precision.append(m.r_precision)
        ndcg.append(m.ndcg)
        plex_clicks.append(m.plex_clicks)

    cov = 1 - miss / float(cnt)
    return MetricsSummary(
        stats.describe(r_precision).mean,
        stats.describe(ndcg).mean,
        stats.describe(plex_clicks).mean,
        cov
    ) 
Example 16
Project: MLSeistoLog   Author: selkurdy   File: mlseistolog.py    MIT License 6 votes vote down vote up
def map2ddata(xy,vr,xyi,radius=5000.0,maptype='idw'):
    # stats=sts.describe(vr)
    # statsstd=sts.tstd(vr)
    if maptype == 'idw':
        vri=idw(xy,vr,xyi)
    elif maptype =='nearest':
        vri=griddata(xy,vr,(xyi[:,0],xyi[:,1]),method='nearest')
    elif maptype == 'linear':
        #                vri=griddata(xy,vr,(xyifhull[:,0],xyifhull[:,1]),method='linear')
        vri=griddata(xy,vr,(xyi[:,0],xyi[:,1]),method='linear')
    elif maptype == 'cubic':
        vri=griddata(xy,vr,(xyi[:,0],xyi[:,1]),method='cubic')
    elif maptype =='rbf':
        rbf=Rbf(xy[:,0],xy[:,1],vr)
        vri= rbf(xyi[:,0],xyi[:,1])
    # elif maptype =='avgmap':
    #     vri=dataavgmap(xy,vr,xyi,radius)
    elif maptype =='triang':
        linearnd=LinearNDInterpolator(xy,vr,stats[2])
        vri= linearnd(xyi)
    elif maptype == 'ct':
        ct=CloughTocher2DInterpolator(xy,vr,stats[2])
        vri=ct(xyi)
    return vri 
Example 17
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 6 votes vote down vote up
def test_describe_axis_none(self):
        x = np.vstack((np.ones((3, 4)), 2 * np.ones((2, 4))))

        # expected values
        e_nobs, e_minmax = (20, (1.0, 2.0))
        e_mean = 1.3999999999999999
        e_var = 0.25263157894736848
        e_skew = 0.4082482904638634
        e_kurt = -1.8333333333333333

        # actual values
        a = stats.describe(x, axis=None)

        assert_equal(a.nobs, e_nobs)
        assert_almost_equal(a.minmax, e_minmax)
        assert_almost_equal(a.mean, e_mean)
        assert_almost_equal(a.variance, e_var)
        assert_array_almost_equal(a.skewness, e_skew, decimal=13)
        assert_array_almost_equal(a.kurtosis, e_kurt, decimal=13) 
Example 18
Project: senior-design   Author: james-tate   File: test_stats.py    GNU General Public License v2.0 6 votes vote down vote up
def test_describe():
    x = np.vstack((np.ones((3,4)),2*np.ones((2,4))))
    nc, mmc = (5, ([ 1.,  1.,  1.,  1.], [ 2.,  2.,  2.,  2.]))
    mc = np.array([ 1.4,  1.4,  1.4,  1.4])
    vc = np.array([ 0.3,  0.3,  0.3,  0.3])
    skc = [0.40824829046386357]*4
    kurtc = [-1.833333333333333]*4
    n, mm, m, v, sk, kurt = stats.describe(x)
    assert_equal(n, nc)
    assert_equal(mm, mmc)
    assert_equal(m, mc)
    assert_equal(v, vc)
    assert_array_almost_equal(sk, skc, decimal=13) #not sure about precision
    assert_array_almost_equal(kurt, kurtc, decimal=13)
    n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
    assert_equal(n, nc)
    assert_equal(mm, mmc)
    assert_equal(m, mc)
    assert_equal(v, vc)
    assert_array_almost_equal(sk, skc, decimal=13) #not sure about precision
    assert_array_almost_equal(kurt, kurtc, decimal=13) 
Example 19
Project: mpd-aae-recommender   Author: lgalke   File: mpd_metrics.py    Apache License 2.0 6 votes vote down vote up
def aggregate_metrics(ground_truth, sub, k, candidates):
    r_precision = []
    ndcg = []
    plex_clicks = []
    miss = 0
    cnt = 0
    for p in candidates:
        cnt += 1
        if p not in sub:
            miss += 1
            m = Metrics(0, 0, 0)  # TODO: make sure this is right
        else:
            m = get_all_metrics(ground_truth[p], sub[p], k)
        r_precision.append(m.r_precision)
        ndcg.append(m.ndcg)
        plex_clicks.append(m.plex_clicks)

    cov = 1 - miss / float(cnt)
    return MetricsSummary(
        stats.describe(r_precision).mean,
        stats.describe(ndcg).mean,
        stats.describe(plex_clicks).mean,
        cov
    ) 
Example 20
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = mstats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes, ma=True) 
Example 21
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_scalar(self):
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            n, mm, m, v, sk, kurt = stats.describe(4.)
        assert_equal(n, 1)
        assert_equal(mm, (4.0, 4.0))
        assert_equal(m, 4.0)
        assert_(np.isnan(v))
        assert_array_almost_equal(sk, 0.0, decimal=13)
        assert_array_almost_equal(kurt, -3.0, decimal=13) 
Example 22
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_numbers(self):
        x = np.vstack((np.ones((3,4)), 2 * np.ones((2,4))))
        nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.]))
        mc = np.array([1.4, 1.4, 1.4, 1.4])
        vc = np.array([0.3, 0.3, 0.3, 0.3])
        skc = [0.40824829046386357] * 4
        kurtc = [-1.833333333333333] * 4
        n, mm, m, v, sk, kurt = stats.describe(x)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)
        n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        x = np.arange(10.)
        x[9] = np.nan

        nc, mmc = (9, (0.0, 8.0))
        mc = 4.0
        vc = 7.5
        skc = 0.0
        kurtc = -1.2300000000000002
        n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='omit')
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        assert_raises(ValueError, stats.describe, x, nan_policy='raise')
        assert_raises(ValueError, stats.describe, x, nan_policy='foobar') 
Example 23
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = stats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes) 
Example 24
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_empty(self):
        assert_raises(ValueError, stats.describe, []) 
Example 25
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_mstats_basic.py    GNU General Public License v3.0 5 votes vote down vote up
def test_describe(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.describe(x, ddof=1)
            rm = stats.mstats.describe(xm, ddof=1)
            for ii in range(6):
                assert_almost_equal(np.asarray(r[ii]),
                                    np.asarray(rm[ii]),
                                    decimal=12) 
Example 26
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_stats.py    GNU General Public License v3.0 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = stats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        for i, attr in enumerate(attributes):
            assert_equal(actual[i], getattr(actual, attr)) 
Example 27
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_stats.py    GNU General Public License v3.0 5 votes vote down vote up
def test_describe_typename(self):
        actual = stats.describe(np.arange(5))
        assert_equal(str(actual)[:8], 'Describe') 
Example 28
Project: google_landmark_2019   Author: artyompal   File: knn_calc.py    Apache License 2.0 5 votes vote down vote up
def search_against_fragment(train_features: np.ndarray, test_features: np.ndarray) \
    -> Tuple[np.ndarray, np.ndarray]:
    if USE_GPU:
        # build a flat index (CPU)
        if USE_COSINE_DIST:
            index_flat = faiss.IndexFlat(DIMS, faiss.METRIC_INNER_PRODUCT)
        else:
            index_flat = faiss.IndexFlatL2(DIMS)

        # make it into a GPU index
        index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
    else:
        index_flat = faiss.IndexFlatIP(DIMS)

    index_flat.add(train_features)
    print("total size of the database:", index_flat.ntotal)

    # print("sanity search...")
    # distances, index = index_flat.search(train_features[:10], K)  # actual search
    # print(index[:10])
    # print(distances[:10])

    print("searching")
    distances, index = index_flat.search(test_features, K)  # actual search
    dprint(index)
    dprint(distances)
    dprint(describe(index.flatten()))
    dprint(describe(distances.flatten()))
    return index, distances 
Example 29
Project: google_landmark_2019   Author: artyompal   File: knn_calc.py    Apache License 2.0 5 votes vote down vote up
def merge_results(index1: np.ndarray, distances1: np.ndarray, index2: np.ndarray,
                  distances2: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    """ Returns top-K of two sets. """
    print("merging results")
    assert index1.shape == distances1.shape and index2.shape == distances2.shape
    assert index1.shape[1] == index2.shape[1]

    joint_indices = np.hstack((index1, index2))
    joint_distances = np.hstack((distances1, distances2))
    print("joint_indices", joint_indices.shape, "joint_distances", joint_distances.shape)
    assert joint_indices.shape == joint_distances.shape

    best_indices = np.zeros((index1.shape[0], K), dtype=int)
    best_distances = np.zeros((index1.shape[0], K), dtype=np.float32)

    for sample in range(joint_indices.shape[0]):
        if not USE_COSINE_DIST:
            closest_indices = np.argsort(joint_distances[sample])
        else:
            closest_indices = np.argsort(-joint_distances[sample])

        closest_indices = closest_indices[:K]
        best_indices[sample] = joint_indices[sample, closest_indices]
        best_distances[sample] = joint_distances[sample, closest_indices]

    print("best_indices", best_indices.shape, "best_distances", best_distances.shape)
    dprint(best_indices)
    dprint(best_distances)
    dprint(describe(best_indices.flatten()))
    return best_indices, best_distances 
Example 30
Project: PyFluxPro   Author: OzFlux   File: pfp_cpd.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def stats_calc(df,stats_df):

    # Add statistics vars to output df
    stats_df['ustar_mean'] = np.nan
    stats_df['ustar_sig'] = np.nan
    stats_df['ustar_n'] = np.nan
    stats_df['crit_t'] = np.nan
    stats_df['95%CI_lower'] = np.nan
    stats_df['95%CI_upper'] = np.nan
    stats_df['skew'] = np.nan
    stats_df['kurt'] = np.nan

    # Drop data that failed b model, then drop b model boolean variable
    df=df[df['b_valid']==True]
    df=df.drop('b_valid',axis=1)

    # Calculate stats
    for i in stats_df.index:
        if stats_df.loc[i, 'b_valid']:
            if isinstance(df.loc[i, 'bMod_threshold'],pd.Series):
                temp = stats.describe(df.loc[i, 'bMod_threshold'])
                stats_df.loc[i, 'ustar_mean'] = temp[2]
                stats_df.loc[i, 'ustar_sig'] = np.sqrt(temp[3])
                stats_df.loc[i, 'crit_t'] = stats.t.ppf(1 - 0.025, temp[0])
                stats_df.loc[i, '95%CI_lower'] = (stats_df.loc[i, 'ustar_mean'] -
                                                  stats_df.loc[i, 'ustar_sig'] *
                                                  stats_df.loc[i, 'crit_t'])
                stats_df.loc[i, '95%CI_upper'] = (stats_df.loc[i, 'ustar_mean'] +
                                                  stats_df.loc[i, 'ustar_sig'] *
                                                  stats_df.loc[i, 'crit_t'])
                stats_df.loc[i, 'skew'] = temp[4]
                stats_df.loc[i, 'kurt'] = temp[5]
            else:
                stats_df.loc[i, 'ustar_mean'] = df.loc[i, 'bMod_threshold']

    return stats_df
#------------------------------------------------------------------------------ 
Example 31
Project: vnpy_crypto   Author: birforce   File: ex_extras.py    MIT License 5 votes vote down vote up
def examples_normexpand():
    skewnorm = SkewNorm_gen()
    rvs = skewnorm.rvs(5,size=100)
    normexpan = NormExpan_gen(rvs, mode='sample')

    smvsk = stats.describe(rvs)[2:]
    print('sample: mu,sig,sk,kur')
    print(smvsk)

    dmvsk = normexpan.stats(moments='mvsk')
    print('normexpan: mu,sig,sk,kur')
    print(dmvsk)
    print('mvsk diff distribution - sample')
    print(np.array(dmvsk) - np.array(smvsk))
    print('normexpan attributes mvsk')
    print(mc2mvsk(normexpan.cnt))
    print(normexpan.mvsk)

    mnc = mvsk2mnc(dmvsk)
    mc = mnc2mc(mnc)
    print('central moments')
    print(mc)
    print('non-central moments')
    print(mnc)


    pdffn = pdf_moments(mc)
    print('\npdf approximation from moments')
    print('pdf at', mc[0]-1,mc[0]+1)
    print(pdffn([mc[0]-1,mc[0]+1]))
    print(normexpan.pdf([mc[0]-1,mc[0]+1])) 
Example 32
Project: vnpy_crypto   Author: birforce   File: test_norm_expan.py    MIT License 5 votes vote down vote up
def test_mvsk(self):
        mvsk = stats.describe(self.rvs)[-4:]
        assert_allclose(self.dist2.mvsk, mvsk, rtol=1e-12) 
Example 33
Project: vnpy_crypto   Author: birforce   File: extras.py    MIT License 5 votes vote down vote up
def __init__(self,args, **kwds):
        #todo: replace with super call
        distributions.rv_continuous.__init__(self,
            name = 'Normal Expansion distribution', shapes = ' ',
            extradoc = '''
        The distribution is defined as the Gram-Charlier expansion of
        the normal distribution using the first four moments. The pdf
        is given by

        pdf(x) = (1+ skew/6.0 * H(xc,3) + kurt/24.0 * H(xc,4))*normpdf(xc)

        where xc = (x-mu)/sig is the standardized value of the random variable
        and H(xc,3) and H(xc,4) are Hermite polynomials

        Note: This distribution has to be parameterized during
        initialization and instantiation, and does not have a shape
        parameter after instantiation (similar to frozen distribution
        except for location and scale.) Location and scale can be used
        as with other distributions, however note, that they are relative
        to the initialized distribution.
        '''  )
        #print args, kwds
        mode = kwds.get('mode', 'sample')

        if mode == 'sample':
            mu,sig,sk,kur = stats.describe(args)[2:]
            self.mvsk = (mu,sig,sk,kur)
            cnt = mvsk2mc((mu,sig,sk,kur))
        elif mode == 'mvsk':
            cnt = mvsk2mc(args)
            self.mvsk = args
        elif mode == 'centmom':
            cnt = args
            self.mvsk = mc2mvsk(cnt)
        else:
            raise ValueError("mode must be 'mvsk' or centmom")

        self.cnt = cnt
        #self.mvsk = (mu,sig,sk,kur)
        #self._pdf = pdf_moments(cnt)
        self._pdf = pdf_mvsk(self.mvsk) 
Example 34
Project: ble5-nrf52-mac   Author: tomasero   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_describe(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.describe(x, ddof=1)
            rm = stats.mstats.describe(xm, ddof=1)
            for ii in range(6):
                assert_almost_equal(np.asarray(r[ii]),
                                    np.asarray(rm[ii]),
                                    decimal=12) 
Example 35
Project: ble5-nrf52-mac   Author: tomasero   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = mstats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes, ma=True) 
Example 36
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_scalar(self):
        with suppress_warnings() as sup, np.errstate(invalid="ignore"):
            sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
            n, mm, m, v, sk, kurt = stats.describe(4.)
        assert_equal(n, 1)
        assert_equal(mm, (4.0, 4.0))
        assert_equal(m, 4.0)
        assert_(np.isnan(v))
        assert_array_almost_equal(sk, 0.0, decimal=13)
        assert_array_almost_equal(kurt, -3.0, decimal=13) 
Example 37
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_numbers(self):
        x = np.vstack((np.ones((3,4)), 2 * np.ones((2,4))))
        nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.]))
        mc = np.array([1.4, 1.4, 1.4, 1.4])
        vc = np.array([0.3, 0.3, 0.3, 0.3])
        skc = [0.40824829046386357] * 4
        kurtc = [-1.833333333333333] * 4
        n, mm, m, v, sk, kurt = stats.describe(x)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)
        n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        x = np.arange(10.)
        x[9] = np.nan

        nc, mmc = (9, (0.0, 8.0))
        mc = 4.0
        vc = 7.5
        skc = 0.0
        kurtc = -1.2300000000000002
        n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='omit')
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        assert_raises(ValueError, stats.describe, x, nan_policy='raise')
        assert_raises(ValueError, stats.describe, x, nan_policy='foobar') 
Example 38
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = stats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes) 
Example 39
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_empty(self):
        assert_raises(ValueError, stats.describe, []) 
Example 40
Project: surfclass   Author: Kortforsyningen   File: train.py    MIT License 5 votes vote down vote up
def genericmodel(trainingdata, outputfile, numtrees, processors):
    r"""Trains a new generic model using an .npz file generated by "surfclass prepare traindata [OPTIONS].

    The traindata should match the model definition.

    Example:
        surfclass train genericmodel "genericmodel_data.npz" "genericmodel_model.sav"

    """
    (_, classes, features) = load_training_data(trainingdata)

    click.echo("Stats for feature data:")
    click.echo(stats.describe(features))

    # Log inputs
    logger.debug(
        "Training genericmodel with arguments: %s, %s, %s",
        trainingdata,
        outputfile,
        numtrees,
    )

    classifier = RandomForest(features.shape[1], model=None)
    logger.debug("Training Model...")
    rf_trained = classifier.train(
        features, classes, num_trees=numtrees, processors=processors
    )

    pickle.dump(rf_trained, open(outputfile, "wb"))
    logger.debug(
        "Training done, written .sav to: %s", pathlib.Path(outputfile).resolve()
    ) 
Example 41
Project: otalign   Author: dmelis   File: gw_optim.py    GNU General Public License v3.0 5 votes vote down vote up
def compute_distances(self, X, Y):
        print('Computing intra-domain distance matrices...')

        if not self.gpu:
            C1 = sp.spatial.distance.cdist(X, X, metric=self.metric)
            C2 = sp.spatial.distance.cdist(Y, Y, metric=self.metric)
            if self.normalize_dists == 'max':
                print('here')
                C1 /= C1.max()
                C2 /= C2.max()
            elif self.normalize_dists == 'mean':
                C1 /= C1.mean()
                C2 /= C2.mean()
            elif self.normalize_dists == 'median':
                C1 /= np.median(C1)
                C2 /= np.median(C2)
        else:
            C1 = cdist(X, X, metric=self.metric, returnAsGPU=True)
            C2 = cdist(Y, Y, metric=self.metric, returnAsGPU=True)
            if self.normalize_dists == 'max':
                C1.divide(float(np.max(C1.asarray())))
                C2.divide(float(np.max(C2.asarray())))
            elif self.normalize_dists == 'mean':
                C1.divide(float(np.mean(C1.asarray())))
                C2.divide(float(np.mean(C2.asarray())))
            elif self.normalize_dists == 'median':
                raise NotImplemented(
                    "Median normalization not implemented in GPU yet")

        stats_C1 = describe(C1.flatten())
        stats_C2 = describe(C2.flatten())

        for (k, C, v) in [('C1', C1, stats_C1), ('C2', C2, stats_C2)]:
            print('Stats Distance Matrix {}. mean: {:8.2f}, median: {:8.2f},\
             min: {:8.2f}, max:{:8.2f}'.format(k, v.mean, np.median(C), v.minmax[0], v.minmax[1]))

        self.C1, self.C2 = C1, C2 
Example 42
Project: poker   Author: surgebiswas   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_describe(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.describe(x, ddof=1)
            rm = stats.mstats.describe(xm, ddof=1)
            for ii in range(6):
                assert_almost_equal(np.asarray(r[ii]),
                                    np.asarray(rm[ii]),
                                    decimal=12) 
Example 43
Project: poker   Author: surgebiswas   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = mstats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes, ma=True) 
Example 44
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_scalar(self):
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            n, mm, m, v, sk, kurt = stats.describe(4.)
        assert_equal(n, 1)
        assert_equal(mm, (4.0, 4.0))
        assert_equal(m, 4.0)
        assert_(np.isnan(v))
        assert_array_almost_equal(sk, 0.0, decimal=13)
        assert_array_almost_equal(kurt, -3.0, decimal=13) 
Example 45
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_numbers(self):
        x = np.vstack((np.ones((3,4)), 2 * np.ones((2,4))))
        nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.]))
        mc = np.array([1.4, 1.4, 1.4, 1.4])
        vc = np.array([0.3, 0.3, 0.3, 0.3])
        skc = [0.40824829046386357] * 4
        kurtc = [-1.833333333333333] * 4
        n, mm, m, v, sk, kurt = stats.describe(x)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)
        n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        x = np.arange(10.)
        x[9] = np.nan

        nc, mmc = (9, (0.0, 8.0))
        mc = 4.0
        vc = 7.5
        skc = 0.0
        kurtc = -1.2300000000000002
        n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='omit')
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        assert_raises(ValueError, stats.describe, x, nan_policy='raise')
        assert_raises(ValueError, stats.describe, x, nan_policy='foobar') 
Example 46
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = stats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes) 
Example 47
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_empty(self):
        assert_raises(ValueError, stats.describe, []) 
Example 48
Project: Test-stock-prediction-algorithms   Author: timestocome   File: PortfolioOptimizationOfIndexFunds.py    MIT License 5 votes vote down vote up
def print_statistics(array):

    sta = scs.describe(array)
    print("%14s %15s" % ('statistic', 'value'))
    print(30 * '-')
    print("%14s %15.5f" % ('size', sta[0]))
    print("%14s %15.5f" % ('min', sta[1][0]))
    print("%14s %15.5f" % ('max', sta[1][1]))
    print("%14s %15.5f" % ('mean', sta[2] ))
    print("%14s %15.5f" % ('std', np.sqrt(sta[3])))
    print("%14s %15.5f" % ('skew', sta[4]))
    print("%14s %15.5f" % ('kutosis', sta[5])) 
Example 49
Project: P3_image_processing   Author: latedude2   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_describe(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.describe(x, ddof=1)
            rm = stats.mstats.describe(xm, ddof=1)
            for ii in range(6):
                assert_almost_equal(np.asarray(r[ii]),
                                    np.asarray(rm[ii]),
                                    decimal=12) 
Example 50
Project: P3_image_processing   Author: latedude2   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = mstats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes, ma=True) 
Example 51
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_scalar(self):
        with suppress_warnings() as sup, np.errstate(invalid="ignore"):
            sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
            n, mm, m, v, sk, kurt = stats.describe(4.)
        assert_equal(n, 1)
        assert_equal(mm, (4.0, 4.0))
        assert_equal(m, 4.0)
        assert_(np.isnan(v))
        assert_array_almost_equal(sk, 0.0, decimal=13)
        assert_array_almost_equal(kurt, -3.0, decimal=13) 
Example 52
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_numbers(self):
        x = np.vstack((np.ones((3,4)), 2 * np.ones((2,4))))
        nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.]))
        mc = np.array([1.4, 1.4, 1.4, 1.4])
        vc = np.array([0.3, 0.3, 0.3, 0.3])
        skc = [0.40824829046386357] * 4
        kurtc = [-1.833333333333333] * 4
        n, mm, m, v, sk, kurt = stats.describe(x)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)
        n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        x = np.arange(10.)
        x[9] = np.nan

        nc, mmc = (9, (0.0, 8.0))
        mc = 4.0
        vc = 7.5
        skc = 0.0
        kurtc = -1.2300000000000002
        n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='omit')
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        assert_raises(ValueError, stats.describe, x, nan_policy='raise')
        assert_raises(ValueError, stats.describe, x, nan_policy='foobar') 
Example 53
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = stats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes) 
Example 54
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_empty(self):
        assert_raises(ValueError, stats.describe, []) 
Example 55
Project: Codigo-Network   Author: davinci26   File: comparison_plotter.py    GNU General Public License v3.0 5 votes vote down vote up
def statistics(filepath):
    results = parse_file(filepath)
    for results_row in results[5]:
        print(stats.describe(results_row))
        print("==========================================") 
Example 56
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_describe(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.describe(x, ddof=1)
            rm = stats.mstats.describe(xm, ddof=1)
            for ii in range(6):
                assert_almost_equal(np.asarray(r[ii]),
                                    np.asarray(rm[ii]),
                                    decimal=12) 
Example 57
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = mstats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes, ma=True) 
Example 58
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_scalar(self):
        with suppress_warnings() as sup, np.errstate(invalid="ignore"):
            sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
            n, mm, m, v, sk, kurt = stats.describe(4.)
        assert_equal(n, 1)
        assert_equal(mm, (4.0, 4.0))
        assert_equal(m, 4.0)
        assert_(np.isnan(v))
        assert_array_almost_equal(sk, 0.0, decimal=13)
        assert_array_almost_equal(kurt, -3.0, decimal=13) 
Example 59
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_numbers(self):
        x = np.vstack((np.ones((3,4)), 2 * np.ones((2,4))))
        nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.]))
        mc = np.array([1.4, 1.4, 1.4, 1.4])
        vc = np.array([0.3, 0.3, 0.3, 0.3])
        skc = [0.40824829046386357] * 4
        kurtc = [-1.833333333333333] * 4
        n, mm, m, v, sk, kurt = stats.describe(x)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)
        n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        x = np.arange(10.)
        x[9] = np.nan

        nc, mmc = (9, (0.0, 8.0))
        mc = 4.0
        vc = 7.5
        skc = 0.0
        kurtc = -1.2300000000000002
        n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='omit')
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        assert_raises(ValueError, stats.describe, x, nan_policy='raise')
        assert_raises(ValueError, stats.describe, x, nan_policy='foobar') 
Example 60
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = stats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes) 
Example 61
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes vote down vote up
def test_describe_empty(self):
        assert_raises(ValueError, stats.describe, []) 
Example 62
Project: recsys-challenge-2018   Author: irhete   File: metrics.py    Apache License 2.0 5 votes vote down vote up
def aggregate_metrics(ground_truth, sub, k, candidates):
    r_precision = []
    ndcg = []
    plex_clicks = []
    miss = 0
    cnt = 0
    for p in candidates:
        cnt += 1
        if p not in sub:
            miss += 1
            m = Metrics(0, 0, 0)  # TODO: make sure this is right
        else:
            m = get_all_metrics(ground_truth[p], sub[p], k)
        r_precision.append(m.r_precision)
        ndcg.append(m.ndcg)
        plex_clicks.append(m.plex_clicks)

    cov = 1 - miss / float(cnt)
    return MetricsSummary(
        stats.describe(r_precision).mean,
        stats.describe(ndcg).mean,
        stats.describe(plex_clicks).mean,
        cov
    )

# This gist shows how to calculate the r_precision score using artist fallback if track doesn't match
# For more information please visit https://recsys-challenge.spotify.com 
Example 63
Project: Hybrid-Weighted-Embedding-Recommender   Author: faizanahemad   File: validation.py    MIT License 5 votes vote down vote up
def error_analysis(train_affinities, validation_affinities, error_df, title):
    # TODO: Error vs User Rating Count
    print("-x-" * 30)
    print("%s: Error Analysis -: " % title)

    print(error_df.describe())

    print("Analysis By actuals")
    print(error_df.groupby(["actuals"]).agg(["mean", "std"]))

    print("Describe Errors -: ")
    print(describe(error_df["errors"].values))
    plt.figure(figsize=(8, 6))
    sns.scatterplot(x="actuals", y="errors", data=error_df)
    plt.title("Errors vs Actuals")
    plt.xlabel("Actuals")
    plt.ylabel("Errors")
    plt.show()
    plt.figure(figsize=(8, 6))
    sns.scatterplot(x="predictions", y="errors", hue="actuals", data=error_df)
    plt.title("Errors vs Predictions")
    plt.xlabel("Predictions")
    plt.ylabel("Errors")
    plt.show()

    plt.figure(figsize=(8, 6))
    sns.distplot(error_df["errors"], bins=100)
    plt.title("Error Histogram")
    plt.show() 
Example 64
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_mstats_basic.py    Apache License 2.0 5 votes vote down vote up
def test_describe(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.describe(x, ddof=1)
            rm = stats.mstats.describe(xm, ddof=1)
            for ii in range(6):
                assert_almost_equal(np.asarray(r[ii]),
                                    np.asarray(rm[ii]),
                                    decimal=12) 
Example 65
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_mstats_basic.py    Apache License 2.0 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = mstats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes, ma=True) 
Example 66
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 5 votes vote down vote up
def test_describe_scalar(self):
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            n, mm, m, v, sk, kurt = stats.describe(4.)
        assert_equal(n, 1)
        assert_equal(mm, (4.0, 4.0))
        assert_equal(m, 4.0)
        assert_(np.isnan(v))
        assert_array_almost_equal(sk, 0.0, decimal=13)
        assert_array_almost_equal(kurt, -3.0, decimal=13) 
Example 67
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 5 votes vote down vote up
def test_describe_numbers(self):
        x = np.vstack((np.ones((3,4)), 2 * np.ones((2,4))))
        nc, mmc = (5, ([1., 1., 1., 1.], [2., 2., 2., 2.]))
        mc = np.array([1.4, 1.4, 1.4, 1.4])
        vc = np.array([0.3, 0.3, 0.3, 0.3])
        skc = [0.40824829046386357] * 4
        kurtc = [-1.833333333333333] * 4
        n, mm, m, v, sk, kurt = stats.describe(x)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)
        n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc, decimal=13)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        x = np.arange(10.)
        x[9] = np.nan

        nc, mmc = (9, (0.0, 8.0))
        mc = 4.0
        vc = 7.5
        skc = 0.0
        kurtc = -1.2300000000000002
        n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='omit')
        assert_equal(n, nc)
        assert_equal(mm, mmc)
        assert_equal(m, mc)
        assert_equal(v, vc)
        assert_array_almost_equal(sk, skc)
        assert_array_almost_equal(kurt, kurtc, decimal=13)

        assert_raises(ValueError, stats.describe, x, nan_policy='raise')
        assert_raises(ValueError, stats.describe, x, nan_policy='foobar') 
Example 68
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 5 votes vote down vote up
def test_describe_result_attributes(self):
        actual = stats.describe(np.arange(5))
        attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
                      'kurtosis')
        check_named_results(actual, attributes) 
Example 69
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 5 votes vote down vote up
def test_describe_empty(self):
        assert_raises(ValueError, stats.describe, []) 
Example 70
Project: Splunking-Crime   Author: nccgroup   File: ex_extras.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def examples_normexpand():
    skewnorm = SkewNorm_gen()
    rvs = skewnorm.rvs(5,size=100)
    normexpan = NormExpan_gen(rvs, mode='sample')

    smvsk = stats.describe(rvs)[2:]
    print('sample: mu,sig,sk,kur')
    print(smvsk)

    dmvsk = normexpan.stats(moments='mvsk')
    print('normexpan: mu,sig,sk,kur')
    print(dmvsk)
    print('mvsk diff distribution - sample')
    print(np.array(dmvsk) - np.array(smvsk))
    print('normexpan attributes mvsk')
    print(mc2mvsk(normexpan.cnt))
    print(normexpan.mvsk)

    mnc = mvsk2mnc(dmvsk)
    mc = mnc2mc(mnc)
    print('central moments')
    print(mc)
    print('non-central moments')
    print(mnc)


    pdffn = pdf_moments(mc)
    print('\npdf approximation from moments')
    print('pdf at', mc[0]-1,mc[0]+1)
    print(pdffn([mc[0]-1,mc[0]+1]))
    print(normexpan.pdf([mc[0]-1,mc[0]+1])) 
Example 71
Project: Splunking-Crime   Author: nccgroup   File: extras.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self,args, **kwds):
        #todo: replace with super call
        distributions.rv_continuous.__init__(self,
            name = 'Normal Expansion distribution', shapes = ' ',
            extradoc = '''
        The distribution is defined as the Gram-Charlier expansion of
        the normal distribution using the first four moments. The pdf
        is given by

        pdf(x) = (1+ skew/6.0 * H(xc,3) + kurt/24.0 * H(xc,4))*normpdf(xc)

        where xc = (x-mu)/sig is the standardized value of the random variable
        and H(xc,3) and H(xc,4) are Hermite polynomials

        Note: This distribution has to be parameterized during
        initialization and instantiation, and does not have a shape
        parameter after instantiation (similar to frozen distribution
        except for location and scale.) Location and scale can be used
        as with other distributions, however note, that they are relative
        to the initialized distribution.
        '''  )
        #print args, kwds
        mode = kwds.get('mode', 'sample')

        if mode == 'sample':
            mu,sig,sk,kur = stats.describe(args)[2:]
            self.mvsk = (mu,sig,sk,kur)
            cnt = mvsk2mc((mu,sig,sk,kur))
        elif mode == 'mvsk':
            cnt = mvsk2mc(args)
            self.mvsk = args
        elif mode == 'centmom':
            cnt = args
            self.mvsk = mc2mvsk(cnt)
        else:
            raise ValueError("mode must be 'mvsk' or centmom")

        self.cnt = cnt
        #self.mvsk = (mu,sig,sk,kur)
        #self._pdf = pdf_moments(cnt)
        self._pdf = pdf_mvsk(self.mvsk) 
Example 72
Project: serverless-performance   Author: diegojancic   File: get-measures.py    MIT License 5 votes vote down vote up
def get_stats(data):
	data_stats = stats.describe(data)
	data_row = [
				functionName,
				"%.02f" % functionInfo["codeSize"],
				"Yes" if functionInfo["inVpc"] else "No"
				]

	# duration mean and variance
	data_row.append(data_stats.mean[0])
	data_row.append(data_stats.variance[0])

	# billed duration mean and variance
	data_row.append(data_stats.mean[1])
	data_row.append(data_stats.variance[1])

	# mem used mean and variance
	data_row.append(data_stats.mean[2])
	data_row.append(data_stats.variance[2])

	# Samples
	data_row.append(data_stats.nobs)

	return data_row
	#print(tabulate([data_row], tablefmt="pipe", headers=summary_headers))


### LOAD ALL FUNCTIONS INFO ### 
Example 73
Project: bmlingam   Author: taku-y   File: test_prob.py    MIT License 5 votes vote down vote up
def _describe_and_check(txt, xs, ss):
    d = stats.describe(xs)
    print(txt)
    print('Mean: {}'.format(d.mean))
    print('Var : {}'.format(d.variance))
    print('Skew: {}'.format(d.skewness))
    print('Kurt: {}'.format(d.kurtosis))

    assert_allclose([d.mean, d.variance, d.skewness, d.kurtosis], 
                    ss, rtol=5e-2, atol=5e-2) 
Example 74
Project: pyhal   Author: sirfoga   File: test_hal_maths_primes.py    GNU General Public License v3.0 5 votes vote down vote up
def test_blum_blum_shub():
    """Tests hal.maths.primes.blum_blum_shub method"""

    seed = 6
    amount = 40
    primes = (83, 103)  # must be == 3 mod 4
    randoms = blum_blum_shub(seed, amount, primes[0], primes[1])

    assert len(randoms) == amount

    distribution = describe(randoms)

    assert abs(distribution.skewness) <= 0.5

    assert not blum_blum_shub(seed, 0, primes[0], primes[1]) 
Example 75
Project: joligraf   Author: Laurans   File: npz_statistic.py    GNU General Public License v3.0 4 votes vote down vote up
def main(npz_filepath, out):
    assert out[-5:] == ".html", "Please enter an html output file"
    data = np.load(npz_filepath)

    widgets = []
    widgets += [
        Div(
            text="<h1>NPZ statistic</h1><b>Subfiles found</b></br>{}".format(data.files)
        )
    ]
    output_file(out, title="NPZ statistic")
    for subfile in data.files:
        subdata = data[subfile].squeeze()
        array_repr = subdata.__str__()
        array_describe = pformat(dict(describe(subdata)._asdict()))
        if subdata.ndim == 1:

            hist, edges = np.histogram(subdata)

            hv_hist = hv.Histogram((edges, hist))
            hv_hist.opts(tools=["hover"], title=subfile + " histogram", width=600)
            fig = hv.render(hv_hist)

        elif subdata.ndim == 2:
            heatmap = hv.Image(subdata)
            heatmap.opts(
                colorbar=True, width=600, height=600, tools=["hover"], title=subfile
            )
            fig = hv.render(heatmap)

        else:
            fig = Div(text="To many dimension to visualize")

        annotation = (
            "<h2>{}</h2>"
            "<b>Overview</b></br>{}</br>{}</br>"
            "<b>Describe</b></br>{}</br>"
        ).format(subfile, subdata.shape, array_repr, array_describe)
        annotation = annotation.replace("\n", "</br>")

        html_annotation = Div(text=annotation)

        widgets += [row([fig, html_annotation])]

    data.close()
    show(column(*widgets)) 
Example 76
Project: joligraf   Author: Laurans   File: json_statistic.py    GNU General Public License v3.0 4 votes vote down vote up
def main(json_path: str, type_: str, out: str):
    assert out[-5:] == ".html"

    if type_ == "f":
        assert json_path[-5:] == ".json"
        list_json_paths = [Path(json_path)]
    else:
        list_json_paths = list(Path(json_path).glob("**/*.json"))

    all_data: list = []
    for pathfile in list_json_paths:
        with open(pathfile) as json_file:
            data = json.load(json_file)
            dictionary = {}
            for k, v in data.items():
                if type(v) == int or type(v) == float:
                    dictionary[k] = v

                elif type(v) == bool:
                    dictionary[k] = int(v)
            all_data += [dictionary]

    df = pd.DataFrame(all_data)
    widgets: list = []

    for column_name in df.columns:
        data = df[column_name].values
        frequencies, edges = np.histogram(data)
        hist = hv.Histogram((edges, frequencies))
        hist.opts(tools=["hover"], title=column_name, width=600)

        annotation = (
            "<h2>{}</h2>"
            "<b>Overview</b></br>{}</br>{}</br>"
            "<b>Describe</b></br>{}</br>"
        ).format(
            column_name,
            data.shape,
            np.array_repr(data),
            pformat(dict(describe(data)._asdict())),
        )
        annotation = annotation.replace("\n", "</br>")
        html_annotation = Div(text=annotation)

        widgets.append(row(hv.render(hist), html_annotation))

    layout = column(*widgets)

    output_file(out, title=out)
    save(layout)