Python scipy.stats.sem() Examples

The following are code examples for showing how to use scipy.stats.sem(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: DataComp   Author: Cojabi   File: utils.py    Apache License 2.0 9 votes vote down vote up
def conf_interval(data_series):
    """
    Calculate the confidence interval for the data distribution under the assumptions that it can be calculated using \
    a student-t distribution.

    :return start: Starting value of the interval
    :return end: Ending value of the interval
    """

    mean = np.mean(data_series)

    conf_int = sem(data_series) * t.ppf((1 + 0.95) / 2, len(data_series) - 1)

    start = mean - conf_int
    end = mean + conf_int

    return start, end 
Example 2
Project: pyqmc   Author: WagnerGroup   File: reblock.py    MIT License 6 votes vote down vote up
def optimally_reblocked(data):
    """
        Find optimal reblocking of input data. Takes in pandas
        DataFrame of raw data to reblock, returns DataFrame
        of reblocked data.
    """
    opt = opt_block(data)
    n_reblock = int(np.amax(opt))
    rb_data = reblock_by2(data, n_reblock)
    serr = rb_data.sem(axis=0)
    d = {
        "mean": rb_data.mean(axis=0),
        "standard error": serr,
        "standard error error": serr / np.sqrt(2 * (len(rb_data) - 1)),
        "reblocks": n_reblock,
    }
    return pd.DataFrame(d) 
Example 3
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_sem(self):
        # example from stats.sem doc
        a = np.arange(20).reshape(5,4)
        am = np.ma.array(a)
        r = stats.sem(a,ddof=1)
        rm = stats.mstats.sem(am, ddof=1)

        assert_allclose(r, 2.82842712, atol=1e-5)
        assert_allclose(rm, 2.82842712, atol=1e-5)

        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
                                stats.sem(x, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
                                stats.sem(y, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
                                stats.sem(x, axis=None, ddof=1), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
                                stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 4
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 6 votes vote down vote up
def test_sem(self):
        # This is not in R, so used:
        #     sqrt(var(testcase)*3/4)/sqrt(3)

        # y = stats.sem(self.shoes[0])
        # assert_approx_equal(y,0.775177399)
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            y = stats.sem(self.scalar_testcase)
        assert_(np.isnan(y))

        y = stats.sem(self.testcase)
        assert_approx_equal(y, 0.6454972244)
        n = len(self.testcase)
        assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        stats.sem(self.testcase, ddof=2))

        x = np.arange(10.)
        x[9] = np.nan
        assert_equal(stats.sem(x), np.nan)
        assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
        assert_raises(ValueError, stats.sem, x, nan_policy='raise')
        assert_raises(ValueError, stats.sem, x, nan_policy='foobar') 
Example 5
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_mstats_basic.py    GNU General Public License v3.0 6 votes vote down vote up
def test_sem(self):
        # example from stats.sem doc
        a = np.arange(20).reshape(5,4)
        am = np.ma.array(a)
        r = stats.sem(a,ddof=1)
        rm = stats.mstats.sem(am, ddof=1)

        assert_allclose(r, 2.82842712, atol=1e-5)
        assert_allclose(rm, 2.82842712, atol=1e-5)

        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
                                stats.sem(x, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
                                stats.sem(y, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
                                stats.sem(x, axis=None, ddof=1), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
                                stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 6
Project: ble5-nrf52-mac   Author: tomasero   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_sem(self):
        # example from stats.sem doc
        a = np.arange(20).reshape(5, 4)
        am = np.ma.array(a)
        r = stats.sem(a, ddof=1)
        rm = stats.mstats.sem(am, ddof=1)

        assert_allclose(r, 2.82842712, atol=1e-5)
        assert_allclose(rm, 2.82842712, atol=1e-5)

        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
                                stats.sem(x, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
                                stats.sem(y, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
                                stats.sem(x, axis=None, ddof=1), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
                                stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 7
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 6 votes vote down vote up
def test_sem(self):
        # This is not in R, so used:
        #     sqrt(var(testcase)*3/4)/sqrt(3)

        # y = stats.sem(self.shoes[0])
        # assert_approx_equal(y,0.775177399)
        with suppress_warnings() as sup, np.errstate(invalid="ignore"):
            sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
            y = stats.sem(self.scalar_testcase)
        assert_(np.isnan(y))

        y = stats.sem(self.testcase)
        assert_approx_equal(y, 0.6454972244)
        n = len(self.testcase)
        assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        stats.sem(self.testcase, ddof=2))

        x = np.arange(10.)
        x[9] = np.nan
        assert_equal(stats.sem(x), np.nan)
        assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
        assert_raises(ValueError, stats.sem, x, nan_policy='raise')
        assert_raises(ValueError, stats.sem, x, nan_policy='foobar') 
Example 8
Project: poker   Author: surgebiswas   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_sem(self):
        # example from stats.sem doc
        a = np.arange(20).reshape(5,4)
        am = np.ma.array(a)
        r = stats.sem(a,ddof=1)
        rm = stats.mstats.sem(am, ddof=1)

        assert_allclose(r, 2.82842712, atol=1e-5)
        assert_allclose(rm, 2.82842712, atol=1e-5)

        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
                                stats.sem(x, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
                                stats.sem(y, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
                                stats.sem(x, axis=None, ddof=1), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
                                stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 9
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 6 votes vote down vote up
def test_sem(self):
        # This is not in R, so used:
        #     sqrt(var(testcase)*3/4)/sqrt(3)

        # y = stats.sem(self.shoes[0])
        # assert_approx_equal(y,0.775177399)
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            y = stats.sem(self.scalar_testcase)
        assert_(np.isnan(y))

        y = stats.sem(self.testcase)
        assert_approx_equal(y, 0.6454972244)
        n = len(self.testcase)
        assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        stats.sem(self.testcase, ddof=2))

        x = np.arange(10.)
        x[9] = np.nan
        assert_equal(stats.sem(x), np.nan)
        assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
        assert_raises(ValueError, stats.sem, x, nan_policy='raise')
        assert_raises(ValueError, stats.sem, x, nan_policy='foobar') 
Example 10
Project: P3_image_processing   Author: latedude2   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_sem(self):
        # example from stats.sem doc
        a = np.arange(20).reshape(5, 4)
        am = np.ma.array(a)
        r = stats.sem(a, ddof=1)
        rm = stats.mstats.sem(am, ddof=1)

        assert_allclose(r, 2.82842712, atol=1e-5)
        assert_allclose(rm, 2.82842712, atol=1e-5)

        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
                                stats.sem(x, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
                                stats.sem(y, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
                                stats.sem(x, axis=None, ddof=1), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
                                stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 11
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 6 votes vote down vote up
def test_sem(self):
        # This is not in R, so used:
        #     sqrt(var(testcase)*3/4)/sqrt(3)

        # y = stats.sem(self.shoes[0])
        # assert_approx_equal(y,0.775177399)
        with suppress_warnings() as sup, np.errstate(invalid="ignore"):
            sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
            y = stats.sem(self.scalar_testcase)
        assert_(np.isnan(y))

        y = stats.sem(self.testcase)
        assert_approx_equal(y, 0.6454972244)
        n = len(self.testcase)
        assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        stats.sem(self.testcase, ddof=2))

        x = np.arange(10.)
        x[9] = np.nan
        assert_equal(stats.sem(x), np.nan)
        assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
        assert_raises(ValueError, stats.sem, x, nan_policy='raise')
        assert_raises(ValueError, stats.sem, x, nan_policy='foobar') 
Example 12
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_sem(self):
        # example from stats.sem doc
        a = np.arange(20).reshape(5,4)
        am = np.ma.array(a)
        r = stats.sem(a,ddof=1)
        rm = stats.mstats.sem(am, ddof=1)

        assert_allclose(r, 2.82842712, atol=1e-5)
        assert_allclose(rm, 2.82842712, atol=1e-5)

        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
                                stats.sem(x, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
                                stats.sem(y, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
                                stats.sem(x, axis=None, ddof=1), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
                                stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 13
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 6 votes vote down vote up
def test_sem(self):
        # This is not in R, so used:
        #     sqrt(var(testcase)*3/4)/sqrt(3)

        # y = stats.sem(self.shoes[0])
        # assert_approx_equal(y,0.775177399)
        with suppress_warnings() as sup, np.errstate(invalid="ignore"):
            sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
            y = stats.sem(self.scalar_testcase)
        assert_(np.isnan(y))

        y = stats.sem(self.testcase)
        assert_approx_equal(y, 0.6454972244)
        n = len(self.testcase)
        assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        stats.sem(self.testcase, ddof=2))

        x = np.arange(10.)
        x[9] = np.nan
        assert_equal(stats.sem(x), np.nan)
        assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
        assert_raises(ValueError, stats.sem, x, nan_policy='raise')
        assert_raises(ValueError, stats.sem, x, nan_policy='foobar') 
Example 14
Project: debarcer   Author: oicr-gsi   File: generate_plots.py    MIT License 6 votes vote down vote up
def ExtractCoverage(ConsFile):
    '''
    (file) -> float, float
     
    :param ConsFile: Consensus file with raw depth at each position within a given region (ie. not merged)

    Return a tuple with the mean read depth within that interval and the standard error of the mean
    '''
    
    L = []
    infile = open(ConsFile)
    Header = infile.readline().rstrip().split('\t')
    for line in infile:
        if 'chr' in line:
            line = line.rstrip().split('\t')
            fam = line[Header.index('FAM')]
            if fam == '0':
                L.append(float(line[Header.index('RAWDP')]))
    infile.close()
    M = np.mean(L)
    sem = stats.sem(L)
    return M, sem 
Example 15
Project: debarcer   Author: oicr-gsi   File: generate_plots.py    MIT License 6 votes vote down vote up
def GetSampleCoverage(L):
    '''
    (list) -> dict
    
    :param L: A list of full paths to consensus files with umi count per interval (ie. files not merged and not empty)
    
    Returns a dictionary of interval coordinates with a list with mean and s.e.m. of coverage within the interval
    '''
    
    D = {}
    for filename in L:
        # extract region from filename 
        region = FormatRegion(filename)
        M, sem = ExtractCoverage(filename)
        D[region] = [M, sem]
    return D 
Example 16
Project: psychrometric-chart-makeover   Author: buds-lab   File: test_categorical.py    MIT License 6 votes vote down vote up
def test_single_layer_stats(self):

        p = cat._CategoricalStatPlotter()

        g = pd.Series(np.repeat(list("abc"), 100))
        y = pd.Series(np.random.RandomState(0).randn(300))

        p.establish_variables(g, y)
        p.estimate_statistic(np.mean, 95, 10000)

        nt.assert_equal(p.statistic.shape, (3,))
        nt.assert_equal(p.confint.shape, (3, 2))

        npt.assert_array_almost_equal(p.statistic,
                                      y.groupby(g).mean())

        for ci, (_, grp_y) in zip(p.confint, y.groupby(g)):
            sem = stats.sem(grp_y)
            mean = grp_y.mean()
            stats.norm.ppf(.975)
            half_ci = stats.norm.ppf(.975) * sem
            ci_want = mean - half_ci, mean + half_ci
            npt.assert_array_almost_equal(ci_want, ci, 2) 
Example 17
Project: psychrometric-chart-makeover   Author: buds-lab   File: test_categorical.py    MIT License 6 votes vote down vote up
def test_single_layer_stats_with_missing_data(self):

        p = cat._CategoricalStatPlotter()

        g = pd.Series(np.repeat(list("abc"), 100))
        y = pd.Series(np.random.RandomState(0).randn(300))

        p.establish_variables(g, y, order=list("abdc"))
        p.estimate_statistic(np.mean, 95, 10000)

        nt.assert_equal(p.statistic.shape, (4,))
        nt.assert_equal(p.confint.shape, (4, 2))

        mean = y[g == "b"].mean()
        sem = stats.sem(y[g == "b"])
        half_ci = stats.norm.ppf(.975) * sem
        ci = mean - half_ci, mean + half_ci
        npt.assert_almost_equal(p.statistic[1], mean)
        npt.assert_array_almost_equal(p.confint[1], ci, 2)

        npt.assert_equal(p.statistic[2], np.nan)
        npt.assert_array_equal(p.confint[2], (np.nan, np.nan)) 
Example 18
Project: psychrometric-chart-makeover   Author: buds-lab   File: test_categorical.py    MIT License 6 votes vote down vote up
def test_nested_stats(self):

        p = cat._CategoricalStatPlotter()

        g = pd.Series(np.repeat(list("abc"), 100))
        h = pd.Series(np.tile(list("xy"), 150))
        y = pd.Series(np.random.RandomState(0).randn(300))

        p.establish_variables(g, y, h)
        p.estimate_statistic(np.mean, 95, 50000)

        nt.assert_equal(p.statistic.shape, (3, 2))
        nt.assert_equal(p.confint.shape, (3, 2, 2))

        npt.assert_array_almost_equal(p.statistic,
                                      y.groupby([g, h]).mean().unstack())

        for ci_g, (_, grp_y) in zip(p.confint, y.groupby(g)):
            for ci, hue_y in zip(ci_g, [grp_y[::2], grp_y[1::2]]):
                sem = stats.sem(hue_y)
                mean = hue_y.mean()
                half_ci = stats.norm.ppf(.975) * sem
                ci_want = mean - half_ci, mean + half_ci
                npt.assert_array_almost_equal(ci_want, ci, 2) 
Example 19
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_mstats_basic.py    Apache License 2.0 6 votes vote down vote up
def test_sem(self):
        # example from stats.sem doc
        a = np.arange(20).reshape(5,4)
        am = np.ma.array(a)
        r = stats.sem(a,ddof=1)
        rm = stats.mstats.sem(am, ddof=1)

        assert_allclose(r, 2.82842712, atol=1e-5)
        assert_allclose(rm, 2.82842712, atol=1e-5)

        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
                                stats.sem(x, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
                                stats.sem(y, axis=None, ddof=0), decimal=13)
            assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
                                stats.sem(x, axis=None, ddof=1), decimal=13)
            assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
                                stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 20
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 6 votes vote down vote up
def test_sem(self):
        # This is not in R, so used:
        #     sqrt(var(testcase)*3/4)/sqrt(3)

        # y = stats.sem(self.shoes[0])
        # assert_approx_equal(y,0.775177399)
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            y = stats.sem(self.scalar_testcase)
        assert_(np.isnan(y))

        y = stats.sem(self.testcase)
        assert_approx_equal(y, 0.6454972244)
        n = len(self.testcase)
        assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        stats.sem(self.testcase, ddof=2))

        x = np.arange(10.)
        x[9] = np.nan
        assert_equal(stats.sem(x), np.nan)
        assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
        assert_raises(ValueError, stats.sem, x, nan_policy='raise')
        assert_raises(ValueError, stats.sem, x, nan_policy='foobar') 
Example 21
Project: pypastry   Author: datapastry   File: evaluation.py    MIT License 6 votes vote down vote up
def _evaluate_predictors(X, predictors, y, groups, cross_validator, scorer):
    run_infos = []
    for predictor in predictors:
        start = datetime.utcnow()

        scores = _get_scores(X, y, groups, cross_validator, predictor, scorer)

        end = datetime.utcnow()

        scores_array = np.hstack(scores)
        mean_score = scores_array.mean()
        sem_score = sem(scores_array)
        results = {'test_score': mean_score, 'test_score_sem': sem_score}

        model_info = get_model_info(predictor)

        run_info = {
            'run_start': str(start),
            'run_end': str(end),
            'run_seconds': (end - start).total_seconds(),
            'results': results,
            'model_info': model_info,
        }
        run_infos.append(run_info)
    return run_infos 
Example 22
Project: conan   Author: HITS-MBM   File: conan.py    GNU General Public License v3.0 6 votes vote down vote up
def block_analysis(vectors, nres, dt):
    b = np.array(vectors)
    i = 0
    block_outf = open("blocking/block_out.txt", "w+")
    print("dt=",dt)
    block_outf.write("#N_iter N_blocks stderr (nm)  stderr_err (nm) corr.time est. (ps)\n")
    while (len(b) >= 2):
        sems = sem(b)
        sem_avg = np.linalg.norm(sems)/nres
        sem_avg_err = sem_avg*(1.0 / np.sqrt(2*(len(b)-1)) )
        if (i == 0):
            sem_avg0 = sem_avg
        block_outf.write(" %4i %9i %12.3e %12.3e   %15.2f\n"%(i, len(b), sem_avg, sem_avg_err, dt*(sem_avg/sem_avg0)**2 ) )
        c = [ 0.5* (b[2*i] + b[2*i+1]) for i in range(int(len(b)/2)) ]
        b = np.array(c)
        i = i+1
    block_outf.close() 
Example 23
Project: pyqmc   Author: WagnerGroup   File: reblock.py    MIT License 5 votes vote down vote up
def reblock_summary(df, nblocks):
    df = reblock(df, nblocks)
    serr = df.sem()
    d = {
        "mean": df.mean(axis=0),
        "standard error": serr,
        "standard error error": serr / np.sqrt(2 * (len(df) - 1)),
        "n_blocks": nblocks,
    }
    return pd.DataFrame(d) 
Example 24
Project: pyqmc   Author: WagnerGroup   File: reblock.py    MIT License 5 votes vote down vote up
def opt_block(df):
    """
    Finds optimal block size for each variable in a dataset
    df is a dataframe where each row is a sample and each column is a calculated quantity
    reblock each column over samples to find the best block size
    Returns optimal_block, a 1D array with the optimal size for each column in df
    """
    newdf = df.copy()
    iblock = 0
    ndata, nvariables = tuple(df.shape[:2])
    optimal_block = np.array([float("NaN")] * nvariables)
    serr0 = df.sem(axis=0).values
    statslist = []
    while newdf.shape[0] > 1:
        serr = newdf.sem(axis=0).values
        serrerr = serr / (2 * (newdf.shape[0] - 1)) ** 0.5
        statslist.append((iblock, serr.copy()))

        n = newdf.shape[0]
        lasteven = n - int(n % 2 == 1)
        newdf = (newdf[:lasteven:2] + newdf[1::2].values) / 2
        iblock += 1
    for iblock, serr in reversed(statslist):
        B3 = 2 ** (3 * iblock)
        inds = np.where(B3 >= 2 * ndata * (serr / serr0) ** 4)[0]
        optimal_block[inds] = iblock

    return optimal_block 
Example 25
Project: pyqmc   Author: WagnerGroup   File: reblock.py    MIT License 5 votes vote down vote up
def test_reblocking():
    """
        Tests reblocking against known distribution.
    """
    from scipy.stats import sem

    def corr_data(N, L):
        """
            Creates correlated data. Taken from 
            https://pyblock.readthedocs.io/en/latest/tutorial.html.
        """
        return np.convolve(np.random.randn(2 ** N), np.ones(2 ** L) / 10, "same")

    n = 11
    cols = ["test_data1", "test_data2"]
    dat1 = corr_data(n, 4)
    dat2 = corr_data(n, 7)
    test_data = pd.DataFrame(data={cols[0]: dat1, cols[1]: dat2})
    reblocked_data = optimally_reblocked(test_data[cols])
    for c in cols:
        row = reblocked_data.loc[c]
        reblocks = reblocked_data["reblocks"].values[0]
        std_err = sem(reblock_by2(test_data, reblocks, c))
        std_err_err = std_err / np.sqrt(2 * (2 ** (n - reblocks) - 1))

        assert np.isclose(
            row["mean"], np.mean(test_data[c]), 1e-10, 1e-12
        ), "Means are not equal"
        assert np.isclose(
            row["standard error"], std_err, 1e-10, 1e-12
        ), "Standard errors are not equal"
        assert np.isclose(
            row["standard error error"], std_err_err, 1e-10, 1e-12
        ), "Standard error errors are not equal"

    statlist = ["mean", "sem", lambda x: x.sem() / np.sqrt(2 * (len(x) - 1))]
    rb1 = reblock(test_data, len(test_data) // 4).agg(statlist).T
    rb2 = reblock_by2(test_data, 2).agg(statlist).T
    for c in rb1.columns:
        assert np.isclose(rb1[c], rb2[c], 1e-10, 1e-12).all(), (c, rb1[c], rb2[c]) 
Example 26
Project: Mussy-Robot   Author: arnomoonens   File: training.py    MIT License 5 votes vote down vote up
def evaluate_cross_validation(clf, X, y, K):
    
    # create a k-fold cross validation iterator
    cv = KFold(len(y), K, shuffle=True, random_state=0)
    # by default the score used is the one returned by score method of the estimator (accuracy)
    scores = cross_val_score(clf, X, y, cv=cv)
    print (scores)
    print ("Mean score: {0:.3f} (+/-{1:.3f})".format(numpy.mean(scores), sem(scores))) 
Example 27
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_sem(self):
        # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
        y = mstats.sem(self.testcase)
        assert_almost_equal(y, 0.6454972244)
        n = self.testcase.count()
        assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        mstats.sem(self.testcase, ddof=2)) 
Example 28
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_function.py    MIT License 5 votes vote down vote up
def scipy_sem(*args, **kwargs):
    from scipy.stats import sem

    return sem(*args, ddof=1, **kwargs) 
Example 29
Project: recruit   Author: Frank-qlu   File: test_nanops.py    Apache License 2.0 5 votes vote down vote up
def test_nansem(self, ddof):
        from scipy.stats import sem
        with np.errstate(invalid='ignore'):
            self.check_funs(nanops.nansem, sem, allow_complex=False,
                            allow_str=False, allow_date=False,
                            allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 30
Project: recruit   Author: Frank-qlu   File: test_function.py    Apache License 2.0 5 votes vote down vote up
def test_ops_general():
    ops = [('mean', np.mean),
           ('median', np.median),
           ('std', np.std),
           ('var', np.var),
           ('sum', np.sum),
           ('prod', np.prod),
           ('min', np.min),
           ('max', np.max),
           ('first', lambda x: x.iloc[0]),
           ('last', lambda x: x.iloc[-1]),
           ('count', np.size), ]
    try:
        from scipy.stats import sem
    except ImportError:
        pass
    else:
        ops.append(('sem', sem))
    df = DataFrame(np.random.randn(1000))
    labels = np.random.randint(0, 50, size=1000).astype(float)

    for op, targop in ops:
        result = getattr(df.groupby(labels), op)().astype(float)
        expected = df.groupby(labels).agg(targop)
        try:
            tm.assert_frame_equal(result, expected)
        except BaseException as exc:
            exc.args += ('operation: %s' % op, )
            raise 
Example 31
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_mstats_basic.py    GNU General Public License v3.0 5 votes vote down vote up
def test_sem(self):
        # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
        y = mstats.sem(self.testcase)
        assert_almost_equal(y, 0.6454972244)
        n = self.testcase.count()
        assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        mstats.sem(self.testcase, ddof=2)) 
Example 32
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_stats.py    GNU General Public License v3.0 5 votes vote down vote up
def test_sem(self):
        # This is not in R, so used:
        #     sqrt(var(testcase)*3/4)/sqrt(3)

        # y = stats.sem(self.shoes[0])
        # assert_approx_equal(y,0.775177399)
        y = stats.sem(self.testcase)
        assert_approx_equal(y, 0.6454972244)
        n = len(self.testcase)
        assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                         stats.sem(self.testcase, ddof=2)) 
Example 33
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_nanops.py    MIT License 5 votes vote down vote up
def test_nansem(self, ddof):
        from scipy.stats import sem
        with np.errstate(invalid='ignore'):
            self.check_funs(nanops.nansem, sem, allow_complex=False,
                            allow_str=False, allow_date=False,
                            allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 34
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_function.py    MIT License 5 votes vote down vote up
def test_ops_general():
    ops = [('mean', np.mean),
           ('median', np.median),
           ('std', np.std),
           ('var', np.var),
           ('sum', np.sum),
           ('prod', np.prod),
           ('min', np.min),
           ('max', np.max),
           ('first', lambda x: x.iloc[0]),
           ('last', lambda x: x.iloc[-1]),
           ('count', np.size), ]
    try:
        from scipy.stats import sem
    except ImportError:
        pass
    else:
        ops.append(('sem', sem))
    df = DataFrame(np.random.randn(1000))
    labels = np.random.randint(0, 50, size=1000).astype(float)

    for op, targop in ops:
        result = getattr(df.groupby(labels), op)().astype(float)
        expected = df.groupby(labels).agg(targop)
        try:
            tm.assert_frame_equal(result, expected)
        except BaseException as exc:
            exc.args += ('operation: %s' % op, )
            raise 
Example 35
Project: vnpy_crypto   Author: birforce   File: test_nanops.py    MIT License 5 votes vote down vote up
def test_nansem(self, ddof):
        from scipy.stats import sem
        with np.errstate(invalid='ignore'):
            self.check_funs(nanops.nansem, sem, allow_complex=False,
                            allow_str=False, allow_date=False,
                            allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 36
Project: vnpy_crypto   Author: birforce   File: test_function.py    MIT License 5 votes vote down vote up
def test_ops_general():
    ops = [('mean', np.mean),
           ('median', np.median),
           ('std', np.std),
           ('var', np.var),
           ('sum', np.sum),
           ('prod', np.prod),
           ('min', np.min),
           ('max', np.max),
           ('first', lambda x: x.iloc[0]),
           ('last', lambda x: x.iloc[-1]),
           ('count', np.size), ]
    try:
        from scipy.stats import sem
    except ImportError:
        pass
    else:
        ops.append(('sem', sem))
    df = DataFrame(np.random.randn(1000))
    labels = np.random.randint(0, 50, size=1000).astype(float)

    for op, targop in ops:
        result = getattr(df.groupby(labels), op)().astype(float)
        expected = df.groupby(labels).agg(targop)
        try:
            tm.assert_frame_equal(result, expected)
        except BaseException as exc:
            exc.args += ('operation: %s' % op, )
            raise 
Example 37
Project: ble5-nrf52-mac   Author: tomasero   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_sem(self):
        # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
        y = mstats.sem(self.testcase)
        assert_almost_equal(y, 0.6454972244)
        n = self.testcase.count()
        assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        mstats.sem(self.testcase, ddof=2)) 
Example 38
Project: Computable   Author: ktraunmueller   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_sem(self):
        """
        this is not in R, so used
        sqrt(var(testcase)*3/4)/sqrt(3)
        """
        #y = stats.sem(self.shoes[0])
        #assert_approx_equal(y,0.775177399)
        y = mstats.sem(self.testcase)
        assert_almost_equal(y,0.6454972244) 
Example 39
Project: Computable   Author: ktraunmueller   File: test_stats.py    MIT License 5 votes vote down vote up
def test_sem(self):
        # This is not in R, so used:
        #     sqrt(var(testcase)*3/4)/sqrt(3)

        # y = stats.sem(self.shoes[0])
        # assert_approx_equal(y,0.775177399)
        y = stats.sem(self.testcase)
        assert_approx_equal(y,0.6454972244) 
Example 40
Project: poker   Author: surgebiswas   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_sem(self):
        # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
        y = mstats.sem(self.testcase)
        assert_almost_equal(y, 0.6454972244)
        n = self.testcase.count()
        assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        mstats.sem(self.testcase, ddof=2)) 
Example 41
Project: OCBO   Author: fusion-ml   File: plotting_util.py    MIT License 5 votes vote down vote up
def _plot_perf(regrets, ax, cmap, ylab='Simple Regret',
               add_legend=True, ordering=None, ts=None, log_plot=False):
    """Plot the total simple regrets.
    Args:
        regrets: dict method -> ndarray, each row is a run's regret.
        ax: pyplot ax object.
        cmap: dict function name -> color.
        add_legend: Whether to add a legend.
    """
    if ordering is None:
        ordering = regrets.keys()
    if log_plot:
        if not (np.asarray([(reg > 0).all() for reg in regrets.values()]).all()):
            global_min = min([np.min(reg) for reg in regrets.values()])
            for reg in regrets.values():
                reg += (0.01 - global_min)
        ax.set_yscale('log', nonposy='clip')
    max_val, min_val = float('-inf'), float('inf')
    for method in ordering:
        if method not in regrets:
            continue
        reg = regrets[method]
        if ts is None:
            plot_ts = range(reg.shape[1])
        else:
            plot_ts = ts
        avg = np.mean(reg, axis=0)
        max_avg, min_avg = max(avg), min(avg)
        max_val = max_val if max_val > max_avg else max_avg
        min_val = min_val if min_val < min_avg else min_avg
        err = sem(reg, axis=0)
        ax.plot(plot_ts, avg, c=cmap[method], label=method.upper(),
                linewidth=2)
        ax.fill_between(plot_ts, avg - err, avg + err, color=cmap[method],
                        alpha=0.4)
    ax.set_xlabel('t')
    ax.set_ylabel(ylab)
    if add_legend:
        leg = ax.legend()
        for legobj in leg.legendHandles:
            legobj.set_linewidth(5.0) 
Example 42
Project: OCBO   Author: fusion-ml   File: plotting_util.py    MIT License 5 votes vote down vote up
def _plot_proportions(f_names, proportions, ax, cmap, add_legend=True,
                      ordering=None):
    """Plot histogram of proportions for functions.
    Args:
        f_names: Name of the functions.
        proportions: dict method -> ndarray, each row is a run's proportion.
        ax: pyplot ax object.
        cmap: dict function name -> color.
        add_legend: Whether to add a legend.
    """
    if ordering is None:
        ordering = proportions.keys()
    idx = 0
    num_avgs = len(f_names)
    width = 1 / (num_avgs + 1)
    for method in ordering:
        if method not in proportions:
            continue
        props = proportions[method]
        avgs = np.mean(props, axis=0)
        errs = sem(props, axis=0)
        xs = 2 * np.arange(num_avgs) + width * idx
        ax.bar(xs, avgs, width, yerr=errs, label=method.upper(), align='edge',
               color=cmap[method])
        idx += 1
    ax.set_xticks(2 * np.arange(num_avgs) + width * num_avgs / 2)
    tick_names = []
    for fn in f_names:
        if 'constant' in fn.lower():
            tick_names.append('Constant')
        else:
            to_add = fn.replace('_', '')
            to_add = to_add.replace('0', '')
            to_add = to_add.capitalize()
            tick_names.append(to_add)
    ax.set_xticklabels(tick_names)
    ax.set_ylabel('Proportion')
    if add_legend:
        ax.legend() 
Example 43
Project: P3_image_processing   Author: latedude2   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_sem(self):
        # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
        y = mstats.sem(self.testcase)
        assert_almost_equal(y, 0.6454972244)
        n = self.testcase.count()
        assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        mstats.sem(self.testcase, ddof=2)) 
Example 44
Project: adVNTR   Author: mehrdadbakhtiari   File: plot.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_correct_estimates_for_ru(files, ru_length=None, adVNTR=False):
    count = 0
    vntr_results = {}
    if len(files) == 0: ###################TEMP
        return 0, 0
    for file_name in files:
        count += 1
        vntr_id = int(file_name.split('_')[-3])
        if vntr_id not in vntr_results.keys():
            vntr_results[vntr_id] = []
        sim = int(file_name.split('_')[-2])
        correct = False
        with open(file_name) as input:
            lines = input.readlines()
            if len(lines) > 1:
                if lines[-1].strip() != 'None' and len(lines[-1]) < 10:
                    estimate = int(float(lines[-1].strip()))
                    if estimate == sim:
                        correct = True
        if correct:
            vntr_results[vntr_id].append(1)
        else:
            vntr_results[vntr_id].append(0)
    vntr_averages = {}
    for vntr_id in vntr_results.keys():
        vntr_averages[vntr_id] = sum(vntr_results[vntr_id]) / float(len(vntr_results[vntr_id])) * 100
    correct_ratio = sum(vntr_averages.values()) / float(len(vntr_averages.values()))
    from scipy import stats
    error_bar = stats.sem([e for e in vntr_averages.values()])
    return correct_ratio, error_bar 
Example 45
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_sem(self):
        # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
        y = mstats.sem(self.testcase)
        assert_almost_equal(y, 0.6454972244)
        n = self.testcase.count()
        assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        mstats.sem(self.testcase, ddof=2)) 
Example 46
Project: tschdata   Author: tum-lkn   File: toolbox.py    GNU General Public License v3.0 5 votes vote down vote up
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0*np.array(data)
    n = len(a)
    m, se = np.mean(a), st.sem(a)
    h = se * st.t._ppf((1+confidence)/2., n-1)
    return h 
Example 47
Project: light_curve_ml   Author: lsst-epo   File: stats_utils.py    MIT License 5 votes vote down vote up
def confidenceInterval(values, mean: float, confidence: float=0.99) -> (float,
                                                                        float):
    """Calculates confidence interval using Student's t-distribution and
    standard error of mean"""
    return stats.t.interval(confidence, len(values) - 1, loc=mean,
                            scale=stats.sem(values)) 
Example 48
Project: quffka   Author: maremun   File: visualize.py    MIT License 5 votes vote down vote up
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), st.sem(a)
    h = se * st.t._ppf((1 + confidence) / 2., n - 1)
    return m, m-h, m+h 
Example 49
Project: predictive-maintenance-using-machine-learning   Author: awslabs   File: test_nanops.py    Apache License 2.0 5 votes vote down vote up
def test_nansem(self, ddof):
        from scipy.stats import sem
        with np.errstate(invalid='ignore'):
            self.check_funs(nanops.nansem, sem, allow_complex=False,
                            allow_str=False, allow_date=False,
                            allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 50
Project: predictive-maintenance-using-machine-learning   Author: awslabs   File: test_function.py    Apache License 2.0 5 votes vote down vote up
def test_ops_general():
    ops = [('mean', np.mean),
           ('median', np.median),
           ('std', np.std),
           ('var', np.var),
           ('sum', np.sum),
           ('prod', np.prod),
           ('min', np.min),
           ('max', np.max),
           ('first', lambda x: x.iloc[0]),
           ('last', lambda x: x.iloc[-1]),
           ('count', np.size), ]
    try:
        from scipy.stats import sem
    except ImportError:
        pass
    else:
        ops.append(('sem', sem))
    df = DataFrame(np.random.randn(1000))
    labels = np.random.randint(0, 50, size=1000).astype(float)

    for op, targop in ops:
        result = getattr(df.groupby(labels), op)().astype(float)
        expected = df.groupby(labels).agg(targop)
        try:
            tm.assert_frame_equal(result, expected)
        except BaseException as exc:
            exc.args += ('operation: %s' % op, )
            raise 
Example 51
Project: fund   Author: Frank-qlu   File: test_nanops.py    Apache License 2.0 5 votes vote down vote up
def test_nansem(self, ddof):
        from scipy.stats import sem
        with np.errstate(invalid='ignore'):
            self.check_funs(nanops.nansem, sem, allow_complex=False,
                            allow_str=False, allow_date=False,
                            allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 52
Project: fund   Author: Frank-qlu   File: test_function.py    Apache License 2.0 5 votes vote down vote up
def test_ops_general():
    ops = [('mean', np.mean),
           ('median', np.median),
           ('std', np.std),
           ('var', np.var),
           ('sum', np.sum),
           ('prod', np.prod),
           ('min', np.min),
           ('max', np.max),
           ('first', lambda x: x.iloc[0]),
           ('last', lambda x: x.iloc[-1]),
           ('count', np.size), ]
    try:
        from scipy.stats import sem
    except ImportError:
        pass
    else:
        ops.append(('sem', sem))
    df = DataFrame(np.random.randn(1000))
    labels = np.random.randint(0, 50, size=1000).astype(float)

    for op, targop in ops:
        result = getattr(df.groupby(labels), op)().astype(float)
        expected = df.groupby(labels).agg(targop)
        try:
            tm.assert_frame_equal(result, expected)
        except BaseException as exc:
            exc.args += ('operation: %s' % op, )
            raise 
Example 53
Project: PyRouge   Author: magic282   File: Rouge.py    GNU General Public License v3.0 5 votes vote down vote up
def get_mean_sd_internal(self, x):
        mean = np.mean(x)
        sd = st.sem(x)
        res = st.t.interval(0.95, len(x) - 1, loc=mean, scale=sd)
        return (mean, sd, res) 
Example 54
Project: psychrometric-chart-makeover   Author: buds-lab   File: test_categorical.py    MIT License 5 votes vote down vote up
def test_nested_stats_with_missing_data(self):

        p = cat._CategoricalStatPlotter()

        g = pd.Series(np.repeat(list("abc"), 100))
        y = pd.Series(np.random.RandomState(0).randn(300))
        h = pd.Series(np.tile(list("xy"), 150))

        p.establish_variables(g, y, h,
                              order=list("abdc"),
                              hue_order=list("zyx"))
        p.estimate_statistic(np.mean, 95, 50000)

        nt.assert_equal(p.statistic.shape, (4, 3))
        nt.assert_equal(p.confint.shape, (4, 3, 2))

        mean = y[(g == "b") & (h == "x")].mean()
        sem = stats.sem(y[(g == "b") & (h == "x")])
        half_ci = stats.norm.ppf(.975) * sem
        ci = mean - half_ci, mean + half_ci
        npt.assert_almost_equal(p.statistic[1, 2], mean)
        npt.assert_array_almost_equal(p.confint[1, 2], ci, 2)

        npt.assert_array_equal(p.statistic[:, 0], [np.nan] * 4)
        npt.assert_array_equal(p.statistic[2], [np.nan] * 3)
        npt.assert_array_equal(p.confint[:, 0],
                               np.zeros((4, 2)) * np.nan)
        npt.assert_array_equal(p.confint[2],
                               np.zeros((3, 2)) * np.nan) 
Example 55
Project: psychrometric-chart-makeover   Author: buds-lab   File: test_nanops.py    MIT License 5 votes vote down vote up
def test_nansem(self, ddof):
        from scipy.stats import sem
        with np.errstate(invalid='ignore'):
            self.check_funs(nanops.nansem, sem, allow_complex=False,
                            allow_str=False, allow_date=False,
                            allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 56
Project: HistoGAN   Author: BMIRDS   File: accuracy_tester.py    GNU General Public License v3.0 5 votes vote down vote up
def get_confidence_interval(class_accuracies, confidence):
    print(st.t.interval(CONFIDENCE_LEVEL, len(class_accuracies)-1,
                        loc=np.mean(class_accuracies),
                        scale=st.sem(class_accuracies))) 
Example 57
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_nanops.py    Apache License 2.0 5 votes vote down vote up
def test_nansem(self):
        tm.skip_if_no_package('scipy', min_version='0.17.0')
        from scipy.stats import sem
        with np.errstate(invalid='ignore'):
            self.check_funs_ddof(nanops.nansem, sem, allow_complex=False,
                                 allow_str=False, allow_date=False,
                                 allow_tdelta=False, allow_obj='convert') 
Example 58
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_groupby.py    Apache License 2.0 5 votes vote down vote up
def test_ops_general(self):
        ops = [('mean', np.mean),
               ('median', np.median),
               ('std', np.std),
               ('var', np.var),
               ('sum', np.sum),
               ('prod', np.prod),
               ('min', np.min),
               ('max', np.max),
               ('first', lambda x: x.iloc[0]),
               ('last', lambda x: x.iloc[-1]),
               ('count', np.size), ]
        try:
            from scipy.stats import sem
        except ImportError:
            pass
        else:
            ops.append(('sem', sem))
        df = DataFrame(np.random.randn(1000))
        labels = np.random.randint(0, 50, size=1000).astype(float)

        for op, targop in ops:
            result = getattr(df.groupby(labels), op)().astype(float)
            expected = df.groupby(labels).agg(targop)
            try:
                tm.assert_frame_equal(result, expected)
            except BaseException as exc:
                exc.args += ('operation: %s' % op, )
                raise 
Example 59
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_mstats_basic.py    Apache License 2.0 5 votes vote down vote up
def test_sem(self):
        # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
        y = mstats.sem(self.testcase)
        assert_almost_equal(y, 0.6454972244)
        n = self.testcase.count()
        assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
                        mstats.sem(self.testcase, ddof=2)) 
Example 60
Project: pytalite   Author: rallyhealth   File: interpretation.py    MIT License 5 votes vote down vote up
def _comp_mean_ci(sample, confidence=0.95):
    """Copmute mean and confidence interval from distribution"""
    if np.all(sample == sample[0]):
        return (sample[0],) * 3
    else:
        mean = np.mean(sample)
        ci = st.t.interval(confidence, len(sample), loc=mean, scale=st.sem(sample))
        return mean, ci[0], ci[1] 
Example 61
Project: event-embedding-multitask   Author: tony-hong   File: test_sp_SRL.py    GNU General Public License v3.0 5 votes vote down vote up
def bootstrap(experiment_name, test_name, net, n_roles, output_list=None, alpha=0.99, n_b=100):
    print ('Bootstrap method: ')
    if not output_list:
        output_list = np.loadtxt('output_' + experiment_name + '.' + test_name.strip('.dat') + '.csv', delimiter = ',')

    rng = np.random
    P_list, R_list, F1_list = [], [], []
    for i in range(n_b):
        rand_len = len(output_list)
        indices = rng.randint(0, rand_len, rand_len)
        conMat = np.zeros((n_roles, n_roles))
        for idx in indices:
            x = int(output_list[idx][0])
            y = int(output_list[idx][1])
            conMat[x, y] += 1
        dir_P, dir_R, dir_F1, precision, recall, F1 = stats(net, conMat)
        P_list.append(dir_P)
        R_list.append(dir_R)
        F1_list.append(dir_F1)

    P_array, R_array, F1_array = np.array(P_list), np.array(R_list), np.array(F1_list)
    P_mean, P_std = np.mean(P_array), np.std(P_array)
    R_mean, R_std = np.mean(R_array), np.std(R_array)
    F1_mean, F1_std = np.mean(F1_array), np.std(F1_array)
    P_t_int = st.t.interval(alpha, len(P_array)-1, loc=np.mean(P_array), scale=st.sem(P_array))
    R_t_int = st.t.interval(alpha, len(R_array)-1, loc=np.mean(R_array), scale=st.sem(R_array))
    F1_t_int = st.t.interval(alpha, len(F1_array)-1, loc=np.mean(F1_array), scale=st.sem(F1_array))

    print ('Precision: \t %.2f \t %.2f \t %.2f \t %.2f' % (P_mean, P_std, P_t_int[0], P_t_int[1]))
    print ('Recall:    \t %.2f \t %.2f \t %.2f \t %.2f' % (R_mean, R_std, R_t_int[0], R_t_int[1]))
    print ('F1:        \t %.2f \t %.2f \t %.2f \t %.2f' % (F1_mean, F1_std, F1_t_int[0], F1_t_int[1]))
    
    return P_mean, P_std, R_mean, R_std, F1_mean, F1_std 
Example 62
Project: bayesmark   Author: uber   File: stats.py    Apache License 2.0 5 votes vote down vote up
def t_EB(x, alpha=0.05, axis=-1):
    """Get t-statistic based error bars on mean of `x`.

    Parameters
    ----------
    x : :class:`numpy:numpy.ndarray` of shape (n_samples,)
        Data points to estimate mean. Must not be empty or contain ``NaN``.
    alpha : float
        The alpha level (``1-confidence``) probability (in (0, 1)) to construct confidence interval from t-statistic.
    axis : int
        The axis on `x` where we compute the t-statistics. The function is vectorized over all other dimensions.

    Returns
    -------
    EB : float
        Size of error bar on mean (``>= 0``). The confidence interval is ``[mean(x) - EB, mean(x) + EB]``. `EB` is
        ``inf`` when ``len(x) <= 1``. Will be ``NaN`` if there are any infinite values in `x`.
    """
    assert np.ndim(x) >= 1 and (not np.any(np.isnan(x)))
    assert np.ndim(alpha) == 0
    assert 0.0 < alpha and alpha < 1.0

    N = np.shape(x)[axis]
    if N <= 1:
        return np.full(np.sum(x, axis=axis).shape, fill_value=np.inf)

    confidence = 1 - alpha
    # loc cancels out when we just want EB anyway
    LB, UB = sst.t.interval(confidence, N - 1, loc=0.0, scale=1.0)
    assert not (LB > UB)
    # Just multiplying scale=ss.sem(x) is better for when scale=0
    EB = 0.5 * sst.sem(x, axis=axis) * (UB - LB)
    return EB 
Example 63
Project: senior-design   Author: james-tate   File: test_stats.py    GNU General Public License v2.0 5 votes vote down vote up
def test_sem(self):
        """
        this is not in R, so used
        sqrt(var(testcase)*3/4)/sqrt(3)
        """
        #y = stats.sem(self.shoes[0])
        #assert_approx_equal(y,0.775177399)
        y = stats.sem(self.testcase)
        assert_approx_equal(y,0.6454972244) 
Example 64
Project: pacbio_variant_caller   Author: EichlerLab   File: genotype.py    MIT License 5 votes vote down vote up
def get_depth_for_regions(bam_fields, alt_breakpoints, ref_breakpoints, min_mapping_quality, min_base_quality):
    """
    Return median minus standard deviation of read depth across variant
    breakpoints in the given alternate and reference haplotypes.
    """
    # Convert breakpoints to a list for easier indexing.
    alt_regions = ["%s:%s-%s" % alt_breakpoint for alt_breakpoint in alt_breakpoints]
    ref_regions = ["%s:%s-%s" % ref_breakpoint for ref_breakpoint in ref_breakpoints]
    # alt_region_size = alt_breakpoints[2] - alt_breakpoints[1]
    # ref_region_size = ref_breakpoints[2] - ref_breakpoints[1]
    # logger.debug("Min depth for alt region %s (size: %s)", alt_region, alt_region_size)
    # logger.debug("Min depth for ref region %s (size: %s)", ref_region, ref_region_size)

    best_alignments = get_best_alignments(bam_fields["file"], ref_regions + alt_regions, quality=min_mapping_quality)
    depth_by_reference_and_position = get_depth_by_reference_and_position(best_alignments, bam_fields["file"], min_base_quality)

    alt_depths = [depth_by_reference_and_position.get(alt_breakpoint[0], {}).get(i, 0)
                  for alt_breakpoint in alt_breakpoints
                  for i in xrange(alt_breakpoint[1], alt_breakpoint[2] + 1)]
    ref_depths = [depth_by_reference_and_position.get(ref_breakpoint[0], {}).get(i, 0)
                  for ref_breakpoint in ref_breakpoints
                  for i in xrange(ref_breakpoint[1], ref_breakpoint[2] + 1)]

    logger.debug("Found %i depths for regions %s: %s", len(alt_depths), alt_regions, alt_depths)
    logger.debug("Found %i depths for regions %s: %s", len(ref_depths), ref_regions, ref_depths)
    logger.debug("Median alt depths: %s", np.median(alt_depths))
    logger.debug("Standard deviation alt depths: %s", np.std(alt_depths))
    logger.debug("Median ref depths: %s", np.median(ref_depths))
    logger.debug("Standard deviation ref depths: %s", np.std(ref_depths))
    high_quality_alt_depth = max(int(np.round(np.median(alt_depths) - sem(alt_depths))), 0)
    high_quality_ref_depth = max(int(np.round(np.median(ref_depths) - sem(ref_depths))), 0)

    return high_quality_alt_depth, high_quality_ref_depth 
Example 65
Project: mentornet   Author: google   File: models.py    Apache License 2.0 5 votes vote down vote up
def mean_confidence_interval(data, confidence=0.9):
  """Calculation of mean confidence interval.

  Args:
    data: the data for which the confidence interval has to be found
    confidence: the confidence threshold

  Returns:
    mean of data, mean confidence interval
  """
  mean, sem, m = np.mean(data), st.sem(data), st.t.ppf((1 + confidence) / 2.,
                                                       len(data) - 1)
  return mean, m * sem 
Example 66
Project: sddpy   Author: wanqiuchansheng   File: utilities.py    MIT License 5 votes vote down vote up
def confidenceinterval(x: List[float], conf_level=0.95):
    """
    获得置信区间
    """
    a = 1.0 * np.array(x)
    n = len(a)
    m, se = np.mean(a), sem(a)
    h = se * t._ppf((1 + conf_level) / 2., n - 1)
    return m - h, m + h 
Example 67
Project: housing-websites   Author: ONSBigData   File: Zoopla-ML-caravans.py    MIT License 5 votes vote down vote up
def fit_evaluate_cv(model, X, y):
    model.fit(X, y)
    cv = StratifiedKFold(y, 10, shuffle=True, random_state=0)
    scores = cross_val_score(model, X, y, cv=cv, scoring='f1')
    print(scores)
    print("Mean F1 score: %0.3f (+/- %0.3f)" % (np.mean(scores), sem(scores)))
    precision = cross_val_score(model, X, y, cv=cv, scoring='precision')
    print("Mean precision: %0.3f" % (np.mean(precision)))
    recall = cross_val_score(model, X, y, cv=cv, scoring='recall')
    print("Mean recall: %0.3f" % (np.mean(recall)))

# Feature selection with 10-fold cross validation 
Example 68
Project: elasticintel   Author: securityclippy   File: test_nanops.py    GNU General Public License v3.0 5 votes vote down vote up
def test_nansem(self):
        tm.skip_if_no_package('scipy', min_version='0.17.0')
        from scipy.stats import sem
        with np.errstate(invalid='ignore'):
            self.check_funs_ddof(nanops.nansem, sem, allow_complex=False,
                                 allow_str=False, allow_date=False,
                                 allow_tdelta=False, allow_obj='convert') 
Example 69
Project: elasticintel   Author: securityclippy   File: test_groupby.py    GNU General Public License v3.0 5 votes vote down vote up
def test_ops_general(self):
        ops = [('mean', np.mean),
               ('median', np.median),
               ('std', np.std),
               ('var', np.var),
               ('sum', np.sum),
               ('prod', np.prod),
               ('min', np.min),
               ('max', np.max),
               ('first', lambda x: x.iloc[0]),
               ('last', lambda x: x.iloc[-1]),
               ('count', np.size), ]
        try:
            from scipy.stats import sem
        except ImportError:
            pass
        else:
            ops.append(('sem', sem))
        df = DataFrame(np.random.randn(1000))
        labels = np.random.randint(0, 50, size=1000).astype(float)

        for op, targop in ops:
            result = getattr(df.groupby(labels), op)().astype(float)
            expected = df.groupby(labels).agg(targop)
            try:
                tm.assert_frame_equal(result, expected)
            except BaseException as exc:
                exc.args += ('operation: %s' % op, )
                raise 
Example 70
Project: treecall   Author: nh3   File: treecall.py    GNU General Public License v2.0 5 votes vote down vote up
def partition_main(args):
    print(args, file=sys.stderr)
    base_prior = make_base_prior(args.het, GTYPE3) # base genotype prior
    mm,mm0,mm1 = make_mut_matrix(args.mu, GTYPE3) # substitution rate matrix, with non-diagonal set to 0, with diagonal set to 0

    vcffile, variants, DPRs, PLs = read_vcf(args.vcf, args.min_ev)
    n_site,n_smpl = PLs.shape[0:2]

    tree = Tree()
    if sem(PLs[...,1],axis=1).mean() > sem(PLs[...,2],axis=1).mean():
        partition(PLs[...,0:2], tree, np.arange(n_smpl), args.min_ev)
    else:
        partition(PLs, tree, np.arange(n_smpl), args.min_ev)

    init_tree(tree)
    PLs = PLs.astype(np.longdouble)
    populate_tree_PL(tree, PLs, mm, 'PL')
    populate_tree_PL(tree, PLs, mm0, 'PL0')
    calc_mut_likelihoods(tree, mm0, mm1)

    print(tree)
    tree.write(outfile=args.output+'.pt0.nwk', format=5)
    best_tree,best_PL = recursive_NNI(tree, mm0, mm1, base_prior)
    best_tree,best_PL = recursive_reroot(best_tree, mm0, mm1, base_prior)
    print(best_tree)
    print('PL_per_site = %.4f' % (best_PL/n_site))
    best_tree.write(outfile=args.output+'.pt.nwk', format=5) 
Example 71
Project: pet-inspector   Author: tadatitam   File: metrics.py    GNU General Public License v3.0 5 votes vote down vote up
def compute_stats(list_dict, pkey, skey):
    sample = [sam[pkey][skey] for sam in list_dict]
    mean = round(np.mean(sample), 3)
    sem = round(stats.sem(sample), 3)
    return mean, sem 
Example 72
Project: fund-rank-dashboard   Author: 1pani   File: test_nanops.py    Apache License 2.0 5 votes vote down vote up
def test_nansem(self, ddof):
        from scipy.stats import sem
        with np.errstate(invalid='ignore'):
            self.check_funs(nanops.nansem, sem, allow_complex=False,
                            allow_str=False, allow_date=False,
                            allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 73
Project: fund-rank-dashboard   Author: 1pani   File: test_function.py    Apache License 2.0 5 votes vote down vote up
def test_ops_general():
    ops = [('mean', np.mean),
           ('median', np.median),
           ('std', np.std),
           ('var', np.var),
           ('sum', np.sum),
           ('prod', np.prod),
           ('min', np.min),
           ('max', np.max),
           ('first', lambda x: x.iloc[0]),
           ('last', lambda x: x.iloc[-1]),
           ('count', np.size), ]
    try:
        from scipy.stats import sem
    except ImportError:
        pass
    else:
        ops.append(('sem', sem))
    df = DataFrame(np.random.randn(1000))
    labels = np.random.randint(0, 50, size=1000).astype(float)

    for op, targop in ops:
        result = getattr(df.groupby(labels), op)().astype(float)
        expected = df.groupby(labels).agg(targop)
        try:
            tm.assert_frame_equal(result, expected)
        except BaseException as exc:
            exc.args += ('operation: %s' % op, )
            raise 
Example 74
Project: nmsat   Author: rcfduarte   File: visualization.py    GNU General Public License v2.0 4 votes vote down vote up
def plot_acc(t, accs, fit_params, acc_function, title='', ax=None, display=True, save=False):
    """
    Plot autocorrelation decay and exponential fit (can be used for other purposes where an exponential fit to the
    data is suitable
    :param t:
    :param accs:
    :param fit_params:
    :param acc_function:
    :param title:
    :param ax:
    :param display:
    :param save:
    :return:
    """
    if (ax is not None) and (not isinstance(ax, mpl.axes.Axes)):
        raise ValueError('ax must be matplotlib.axes.Axes instance.')

    if ax is None:
        fig = pl.figure()
        fig.suptitle(title)
        ax = fig.add_subplot(111)
    else:
        ax.set_title(title)

    for n in range(accs.shape[0]):
        ax.plot(t, accs[n, :], alpha=0.1, lw=0.1, color='k')

    error = np.sum((np.mean(accs, 0) - acc_function(t, *fit_params)) ** 2)
    label = r'$a = {0}, b = {1}, {2}={3}, MSE = {4}$'.format(str(np.round(fit_params[0], 2)), str(np.round(fit_params[
                                                                                                               1], 2)), r'\tau_{int}', str(np.round(fit_params[2], 2)), str(error))
    ax.errorbar(t, np.mean(accs, 0), yerr=st.sem(accs), fmt='', color='k', alpha=0.3)
    ax.plot(t, np.mean(accs, 0), '--')
    ax.plot(t, acc_function(t, *fit_params), 'r', label=label)
    ax.legend()

    ax.set_ylabel(r'Autocorrelation')
    ax.set_xlabel(r'Lag [ms]')
    ax.set_xlim(min(t), max(t))
    #ax.set_ylim(0., 1.)

    if save:
        assert isinstance(save, str), "Please provide filename"
        ax.figure.savefig(save + 'acc_fit.pdf')

    if display:
        pl.show(False) 
Example 75
Project: darts-UNIQ   Author: yochaiz   File: statistics.py    Apache License 2.0 4 votes vote down vote up
def plot(self, label):
        # average accuracy
        yMean = mean(self.yValues)
        color = plt.cm.Greens(0.9) if isinstance(label, str) and ((' 16]' in label) or ('16,' in label)) else self.colors[self.nextColorIdx]
        confidenceIntervalColor = plt.cm.Greens(0.5)
        color = plt.cm.cool(0.45) if isinstance(label, str) and ((' 16]' in label) or ('16,' in label)) else self.colors[self.nextColorIdx]
        confidenceIntervalColor = plt.cm.cool(0.15)

        if isinstance(label, str) and ((' 16]' in label) or ('16,' in label)):
            self.ax.plot(self.xValues, [yMean], 'o', label=label, c=color,zorder=900)
        else:
            self.ax.plot(self.xValues, [yMean], 'o', label=label, c=color)

        # update yMax, yMin
        self.yMax = max(self.yMax, yMean)
        self.yMin = min(self.yMin, yMean)

        # # annotate label
        # self.ax.annotate('{}'.format(label[label.find('-') + 1:]), (self.xValues[0], yMean), size=6)

        confidenceHalfInterval = None
        # add error bar if there is more than single value
        if len(self.yValues) > 1:
            # calc standard error of the mean
            sem = st.sem(self.yValues)
            # calc confidence interval
            intervalMin, intervalMax = st.t.interval(self.confidence, len(self.yValues) - 1, loc=yMean, scale=sem)
            confidenceHalfInterval = (intervalMax - intervalMin) / 2
            self.ax.errorbar(self.xValues, [yMean], yerr=confidenceHalfInterval,
                             ms=5, marker='X', capsize=4, markeredgewidth=1, elinewidth=2, c=color)

            if isinstance(label, str) and ((' 16]' in label) or ('16,' in label)):
                self.ax.errorbar(self.xValues, [yMean], yerr=confidenceHalfInterval,
                                 ms=5, marker='X', capsize=4, markeredgewidth=1, elinewidth=2, c=color,zorder=900)

        if isinstance(label, str) and ((' 16]' in label) or ('16,' in label)):
            if self.previousPoint is not None:
                xPrev, yPrev, confidenceHalfIntervalPrev = self.previousPoint
                self.ax.plot([xPrev, self.xValues[-1]], [yPrev, yMean], '--', c=color,zorder=900)
                if confidenceHalfInterval and confidenceHalfIntervalPrev:
                    self.ax.plot([xPrev, self.xValues[-1]], [yPrev - confidenceHalfIntervalPrev, yMean - confidenceHalfInterval], '--',
                                 c=confidenceIntervalColor,zorder=900)
                    self.ax.plot([xPrev, self.xValues[-1]], [yPrev + confidenceHalfIntervalPrev, yMean + confidenceHalfInterval], '--',
                                 c=confidenceIntervalColor,zorder=900)
            # save last point as previous point
            self.previousPoint = (self.xValues[-1], yMean, confidenceHalfInterval)

        # update variables for next plot
        self.nextColorIdx += 1
        self.resetPlot() 
Example 76
Project: adVNTR   Author: mehrdadbakhtiari   File: plot.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def plot_estimates(ru_estimate_plot, files):
    data = [[0 for _ in range(len(files) / 39)] for _ in range(39-2)]
    naive_data = [[0 for _ in range(len(files) / 39)] for _ in range(39-2)]
    # data = [[0 for _ in range(39)] for _ in range(len(files) / 39)]

    simulated = set([])
    for file_name in files:
        sim = int(file_name.split('_')[-2])
        simulated.add(sim)
    simulated = sorted(list(simulated))
    sim_to_ind = {sim: i for i, sim in enumerate(simulated)}
    # print sim_to_ind

    for file_name in files:
        coverage = int(file_name.split('_')[-1].split('.')[0][:-1])
        if coverage < 3:
            continue
        sim = int(file_name.split('_')[-2])
        estimate = 0
        naive_estimate = 0
        with open(file_name) as input:
            lines = input.readlines()
            if len(lines) > 1:
                if lines[-1].strip() != 'None' and len(lines[-1]) < 10:
                    estimate = int(float(lines[-1].strip()))
        with open(file_name + '.naive') as input:
            lines = input.readlines()
            if len(lines) > 1:
                if lines[-1].strip() != 'None' and len(lines[-1]) < 10:
                    naive_estimate = int(float(lines[-1].strip()))
        # print coverage, sim, sim_to_ind[sim]
        data[coverage - 3][sim_to_ind[sim]] = estimate
        naive_data[coverage - 3][sim_to_ind[sim]] = naive_estimate
        # data[sim_to_ind[sim]][coverage - 1] = estimate
    # if i == 0:
    #     ru_estimate_plots[i] = sns.tsplot(data, err_style="ci_bars")
    # else:
    from scipy import stats
    averages = []
    for sim_rcout in range(len(data[0])):
        total = 0
        for cov in range(len(data)):
            if data[cov][sim_rcout] == 0:
                print(cov+9, sim_rcout)
            total += data[cov][sim_rcout]
        averages.append(total / float(len(data)))

    naive_averages = []
    for sim_rcout in range(len(naive_data[0])):
        total = 0
        for cov in range(len(naive_data)):
            total += naive_data[cov][sim_rcout]
        naive_averages.append(total / float(len(naive_data)))

    ru_estimate_plot.errorbar(simulated, averages, yerr=stats.sem(data))
    ru_estimate_plot.set_xticks(simulated, [sim+1 for sim in simulated])
    ru_estimate_plot.set_yticks(simulated, [sim+1 for sim in simulated])
    # ru_estimate_plot.errorbar(simulated, naive_averages, yerr=stats.sem(naive_data)) 
Example 77
Project: AlphaPy   Author: ScottfreeLLC   File: features.py    Apache License 2.0 4 votes vote down vote up
def create_scipy_features(base_features, sentinel):
    r"""Calculate the skew, kurtosis, and other statistical features
    for each row.

    Parameters
    ----------
    base_features : numpy array
        The feature dataframe.
    sentinel : float
        The number to be imputed for NaN values.

    Returns
    -------
    sp_features : numpy array
        The calculated SciPy features.

    """

    logger.info("Creating SciPy Features")

    # Generate scipy features

    logger.info("SciPy Feature: geometric mean")
    row_gmean = sps.gmean(base_features, axis=1)
    logger.info("SciPy Feature: kurtosis")
    row_kurtosis = sps.kurtosis(base_features, axis=1)
    logger.info("SciPy Feature: kurtosis test")
    row_ktest, pvalue = sps.kurtosistest(base_features, axis=1)
    logger.info("SciPy Feature: normal test")
    row_normal, pvalue = sps.normaltest(base_features, axis=1)
    logger.info("SciPy Feature: skew")
    row_skew = sps.skew(base_features, axis=1)
    logger.info("SciPy Feature: skew test")
    row_stest, pvalue = sps.skewtest(base_features, axis=1)
    logger.info("SciPy Feature: variation")
    row_var = sps.variation(base_features, axis=1)
    logger.info("SciPy Feature: signal-to-noise ratio")
    row_stn = sps.signaltonoise(base_features, axis=1)
    logger.info("SciPy Feature: standard error of mean")
    row_sem = sps.sem(base_features, axis=1)

    sp_features = np.column_stack((row_gmean, row_kurtosis, row_ktest,
                                   row_normal, row_skew, row_stest,
                                   row_var, row_stn, row_sem))
    sp_features = impute_values(sp_features, 'float64', sentinel)
    sp_features = StandardScaler().fit_transform(sp_features)

    # Return new SciPy features

    logger.info("SciPy Feature Count : %d", sp_features.shape[1])
    return sp_features


#
# Function create_clusters
#