Python scipy.stats.f_oneway() Examples

The following are code examples for showing how to use scipy.stats.f_oneway(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 6 votes vote down vote up
def test_nist(self):
        # These are the nist ANOVA files. They can be found at:
        # http://www.itl.nist.gov/div898/strd/anova/anova.html
        filenames = ['SiRstv.dat', 'SmLs01.dat', 'SmLs02.dat', 'SmLs03.dat',
                     'AtmWtAg.dat', 'SmLs04.dat', 'SmLs05.dat', 'SmLs06.dat',
                     'SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat']

        for test_case in filenames:
            rtol = 1e-7
            fname = os.path.abspath(os.path.join(os.path.dirname(__file__),
                                                 'data/nist_anova', test_case))
            with open(fname, 'r') as f:
                content = f.read().split('\n')
            certified = [line.split() for line in content[40:48]
                         if line.strip()]
            dataf = np.loadtxt(fname, skiprows=60)
            y, x = dataf.T
            y = y.astype(int)
            caty = np.unique(y)
            f = float(certified[0][-1])

            xlist = [x[y == i] for i in caty]
            res = stats.f_oneway(*xlist)

            # With the hard test cases we relax the tolerance a bit.
            hard_tc = ('SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat')
            if test_case in hard_tc:
                rtol = 1e-4

            assert_allclose(res[0], f, rtol=rtol,
                            err_msg='Failing testcase: %s' % test_case) 
Example 2
Project: ISM2017   Author: ybayle   File: stats.py    MIT License 6 votes vote down vote up
def anova(data):
    """
    return True is at least one mean is different from the other
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.f_oneway.html
    """
    if len(data) == 2:
        statistic, pvalue = stats.f_oneway(data[0], data[1])
    elif len(data) == 3:
        statistic, pvalue = stats.f_oneway(data[0], data[1], data[2])
    elif len(data) == 4:
        statistic, pvalue = stats.f_oneway(data[0], data[1], data[2], data[3])
    else:
        utils.print_error("TODO ANOVA manage more values")
    print("ANOVA Statistic " + str(statistic) + " and p-value " + str(pvalue))
    if pvalue < 0.05:
        return True
    else:
        return False 
Example 3
Project: pysciencedock   Author: Kitware   File: anova.py    Apache License 2.0 6 votes vote down vote up
def anova(data):
    if len(data.groupby(level=1)) <= 2:
        raise Exception('ANOVA requires a secondary index with three or more values')

    return pd.DataFrame(
        [f_oneway(*[v for k, v in data[col].groupby(level=1)]) for col in data.columns],
        columns=['f', 'p'],
        index=data.columns) 
Example 4
Project: abagen   Author: rmarkello   File: test_correct.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test__batch():
    rs = np.random.RandomState(1234)
    # p-values for ANOVA should all be ~0 (large group differences) before
    # batch correction
    y = [rs.normal(size=(100, 1000)) + f for f in [5, 0, 0]]
    assert np.allclose(sstats.f_oneway(*y)[1], 0)

    # F-values for ANOVA should all be ~0 (no group differences) after batch
    # correction; p-values returned here are sometimes NaN so not a good test
    out = correct._batch_correct(y)
    assert np.allclose(sstats.f_oneway(*out)[0], 0)

    # mean expressions after correction should be ~equal
    assert np.allclose([o.mean() for o in out], 1.24871965683026)

    with pytest.raises(ValueError):
        correct._batch_correct([y[0]]) 
Example 5
Project: scanorama   Author: brianhie   File: pancreas_tests.py    MIT License 5 votes vote down vote up
def print_oneway(X, genes, ds_labels):
    for gene_idx, gene in enumerate(genes):
        ds_names = sorted(set(ds_labels))
        dist = []
        for ds in ds_names:
            dist.append(X[ds_labels == ds, gene_idx])
        sys.stdout.write('{}\t'.format(gene))
        print('{}\t{}'.format(*f_oneway(*dist))) 
Example 6
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 5 votes vote down vote up
def test_trivial(self):
        # A trivial test of stats.f_oneway, with F=0.
        F, p = stats.f_oneway([0,2], [0,2])
        assert_equal(F, 0.0) 
Example 7
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 5 votes vote down vote up
def test_basic(self):
        # Despite being a floating point calculation, this data should
        # result in F being exactly 2.0.
        F, p = stats.f_oneway([0,2], [2,4])
        assert_equal(F, 2.0) 
Example 8
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 5 votes vote down vote up
def test_large_integer_array(self):
        a = np.array([655, 788], dtype=np.uint16)
        b = np.array([789, 772], dtype=np.uint16)
        F, p = stats.f_oneway(a, b)
        assert_almost_equal(F, 0.77450216931805538) 
Example 9
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 5 votes vote down vote up
def test_result_attributes(self):
        a = np.array([655, 788], dtype=np.uint16)
        b = np.array([789, 772], dtype=np.uint16)
        res = stats.f_oneway(a, b)
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes) 
Example 10
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_stats.py    GNU General Public License v3.0 5 votes vote down vote up
def test_trivial(self):
        # A trivial test of stats.f_oneway, with F=0.
        F, p = stats.f_oneway([0,2], [0,2])
        assert_equal(F, 0.0) 
Example 11
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_stats.py    GNU General Public License v3.0 5 votes vote down vote up
def test_basic(self):
        # Despite being a floating point calculation, this data should
        # result in F being exactly 2.0.
        F, p = stats.f_oneway([0,2], [2,4])
        assert_equal(F, 2.0) 
Example 12
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_stats.py    GNU General Public License v3.0 5 votes vote down vote up
def test_large_integer_array(self):
        a = np.array([655, 788], dtype=np.uint16)
        b = np.array([789, 772], dtype=np.uint16)
        F, p = stats.f_oneway(a, b)
        assert_almost_equal(F, 0.77450216931805538) 
Example 13
Project: TabPy   Author: tableau   File: ANOVA.py    MIT License 5 votes vote down vote up
def anova(_arg1, _arg2, *_argN):
    """
    ANOVA is a statistical hypothesis test that is used to compare
    two or more group means for equality.For more information on
    the function and how to use it please refer to tabpy-tools.md
    """

    cols = [_arg1, _arg2] + list(_argN)
    for col in cols:
        if not isinstance(col[0], (int, float)):
            print("values must be numeric")
            raise ValueError
    _, p_value = stats.f_oneway(_arg1, _arg2, *_argN)
    return p_value 
Example 14
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes vote down vote up
def test_trivial(self):
        # A trivial test of stats.f_oneway, with F=0.
        F, p = stats.f_oneway([0,2], [0,2])
        assert_equal(F, 0.0) 
Example 15
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes vote down vote up
def test_basic(self):
        # Despite being a floating point calculation, this data should
        # result in F being exactly 2.0.
        F, p = stats.f_oneway([0,2], [2,4])
        assert_equal(F, 2.0) 
Example 16
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes vote down vote up
def test_large_integer_array(self):
        a = np.array([655, 788], dtype=np.uint16)
        b = np.array([789, 772], dtype=np.uint16)
        F, p = stats.f_oneway(a, b)
        assert_almost_equal(F, 0.77450216931805538) 
Example 17
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes vote down vote up
def test_result_attributes(self):
        a = np.array([655, 788], dtype=np.uint16)
        b = np.array([789, 772], dtype=np.uint16)
        res = stats.f_oneway(a, b)
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes) 
Example 18
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes vote down vote up
def test_nist(self):
        # These are the nist ANOVA files. They can be found at:
        # https://www.itl.nist.gov/div898/strd/anova/anova.html
        filenames = ['SiRstv.dat', 'SmLs01.dat', 'SmLs02.dat', 'SmLs03.dat',
                     'AtmWtAg.dat', 'SmLs04.dat', 'SmLs05.dat', 'SmLs06.dat',
                     'SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat']

        for test_case in filenames:
            rtol = 1e-7
            fname = os.path.abspath(os.path.join(os.path.dirname(__file__),
                                                 'data/nist_anova', test_case))
            with open(fname, 'r') as f:
                content = f.read().split('\n')
            certified = [line.split() for line in content[40:48]
                         if line.strip()]
            dataf = np.loadtxt(fname, skiprows=60)
            y, x = dataf.T
            y = y.astype(int)
            caty = np.unique(y)
            f = float(certified[0][-1])

            xlist = [x[y == i] for i in caty]
            res = stats.f_oneway(*xlist)

            # With the hard test cases we relax the tolerance a bit.
            hard_tc = ('SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat')
            if test_case in hard_tc:
                rtol = 1e-4

            assert_allclose(res[0], f, rtol=rtol,
                            err_msg='Failing testcase: %s' % test_case) 
Example 19
Project: Computable   Author: ktraunmueller   File: test_stats.py    MIT License 5 votes vote down vote up
def test_trivial(self):
        # A trivial test of stats.f_oneway, with F=0.
        F, p = stats.f_oneway([0,2], [0,2])
        assert_equal(F, 0.0) 
Example 20
Project: Computable   Author: ktraunmueller   File: test_stats.py    MIT License 5 votes vote down vote up
def test_basic(self):
        # A test of stats.f_oneway, with F=2.
        F, p = stats.f_oneway([0,2], [2,4])
        # Despite being a floating point calculation, this data should
        # result in F being exactly 2.0.
        assert_equal(F, 2.0) 
Example 21
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 5 votes vote down vote up
def test_trivial(self):
        # A trivial test of stats.f_oneway, with F=0.
        F, p = stats.f_oneway([0,2], [0,2])
        assert_equal(F, 0.0) 
Example 22
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 5 votes vote down vote up
def test_basic(self):
        # Despite being a floating point calculation, this data should
        # result in F being exactly 2.0.
        F, p = stats.f_oneway([0,2], [2,4])
        assert_equal(F, 2.0) 
Example 23
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 5 votes vote down vote up
def test_large_integer_array(self):
        a = np.array([655, 788], dtype=np.uint16)
        b = np.array([789, 772], dtype=np.uint16)
        F, p = stats.f_oneway(a, b)
        assert_almost_equal(F, 0.77450216931805538) 
Example 24
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 5 votes vote down vote up
def test_result_attributes(self):
        a = np.array([655, 788], dtype=np.uint16)
        b = np.array([789, 772], dtype=np.uint16)
        res = stats.f_oneway(a, b)
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes) 
Example 25
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 5 votes vote down vote up
def test_nist(self):
        # These are the nist ANOVA files. They can be found at:
        # http://www.itl.nist.gov/div898/strd/anova/anova.html
        filenames = ['SiRstv.dat', 'SmLs01.dat', 'SmLs02.dat', 'SmLs03.dat',
                     'AtmWtAg.dat', 'SmLs04.dat', 'SmLs05.dat', 'SmLs06.dat',
                     'SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat']

        for test_case in filenames:
            rtol = 1e-7
            fname = os.path.abspath(os.path.join(os.path.dirname(__file__),
                                                 'data/nist_anova', test_case))
            with open(fname, 'r') as f:
                content = f.read().split('\n')
            certified = [line.split() for line in content[40:48]
                         if line.strip()]
            dataf = np.loadtxt(fname, skiprows=60)
            y, x = dataf.T
            y = y.astype(int)
            caty = np.unique(y)
            f = float(certified[0][-1])

            xlist = [x[y == i] for i in caty]
            res = stats.f_oneway(*xlist)

            # With the hard test cases we relax the tolerance a bit.
            hard_tc = ('SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat')
            if test_case in hard_tc:
                rtol = 1e-4

            assert_allclose(res[0], f, rtol=rtol,
                            err_msg='Failing testcase: %s' % test_case) 
Example 26
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_trivial(self):
        # A trivial test of stats.f_oneway, with F=0.
        F, p = stats.f_oneway([0,2], [0,2])
        assert_equal(F, 0.0) 
Example 27
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_basic(self):
        # Despite being a floating point calculation, this data should
        # result in F being exactly 2.0.
        F, p = stats.f_oneway([0,2], [2,4])
        assert_equal(F, 2.0) 
Example 28
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_large_integer_array(self):
        a = np.array([655, 788], dtype=np.uint16)
        b = np.array([789, 772], dtype=np.uint16)
        F, p = stats.f_oneway(a, b)
        assert_almost_equal(F, 0.77450216931805538) 
Example 29
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_result_attributes(self):
        a = np.array([655, 788], dtype=np.uint16)
        b = np.array([789, 772], dtype=np.uint16)
        res = stats.f_oneway(a, b)
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes) 
Example 30
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_nist(self):
        # These are the nist ANOVA files. They can be found at:
        # https://www.itl.nist.gov/div898/strd/anova/anova.html
        filenames = ['SiRstv.dat', 'SmLs01.dat', 'SmLs02.dat', 'SmLs03.dat',
                     'AtmWtAg.dat', 'SmLs04.dat', 'SmLs05.dat', 'SmLs06.dat',
                     'SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat']

        for test_case in filenames:
            rtol = 1e-7
            fname = os.path.abspath(os.path.join(os.path.dirname(__file__),
                                                 'data/nist_anova', test_case))
            with open(fname, 'r') as f:
                content = f.read().split('\n')
            certified = [line.split() for line in content[40:48]
                         if line.strip()]
            dataf = np.loadtxt(fname, skiprows=60)
            y, x = dataf.T
            y = y.astype(int)
            caty = np.unique(y)
            f = float(certified[0][-1])

            xlist = [x[y == i] for i in caty]
            res = stats.f_oneway(*xlist)

            # With the hard test cases we relax the tolerance a bit.
            hard_tc = ('SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat')
            if test_case in hard_tc:
                rtol = 1e-4

            assert_allclose(res[0], f, rtol=rtol,
                            err_msg='Failing testcase: %s' % test_case) 
Example 31
Project: facial-emotion-detection-dl   Author: dllatas   File: calculate_p_value.py    MIT License 5 votes vote down vote up
def main():
	"""
	1st phase
	top1 = [70.0, 71.1, 72.5, 70.8, 68.1, 71.9, 71.1, 71.3, 68.4, 70.2]
	top3 = [75.8, 78.4, 77.8, 77.7, 80.0, 77.8, 78.7, 76.4, 79.1, 77.3]
	2nd phase
	"""
	x = [53.6, 54.5, 53.7, 52.7, 53.1, 55.5, 55.5, 52.8, 53.7, 52.7]
	y = [89.7, 89.1, 89.5, 88.7, 89.4, 88.6, 89.8, 89.5, 89.2, 89.7]
	# Compute the Wilcoxon rank-sum statistic for two samples.
	wilcoxon = stats.ranksums(x, y)
	anova = stats.f_oneway(x, y)
	print "Wilcoxon: " + str(wilcoxon[1]) + "; ANOVA: " + str(anova[1]) 
Example 32
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes vote down vote up
def test_trivial(self):
        # A trivial test of stats.f_oneway, with F=0.
        F, p = stats.f_oneway([0,2], [0,2])
        assert_equal(F, 0.0) 
Example 33
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes vote down vote up
def test_basic(self):
        # Despite being a floating point calculation, this data should
        # result in F being exactly 2.0.
        F, p = stats.f_oneway([0,2], [2,4])
        assert_equal(F, 2.0) 
Example 34
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes vote down vote up
def test_large_integer_array(self):
        a = np.array([655, 788], dtype=np.uint16)
        b = np.array([789, 772], dtype=np.uint16)
        F, p = stats.f_oneway(a, b)
        assert_almost_equal(F, 0.77450216931805538) 
Example 35
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes vote down vote up
def test_result_attributes(self):
        a = np.array([655, 788], dtype=np.uint16)
        b = np.array([789, 772], dtype=np.uint16)
        res = stats.f_oneway(a, b)
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes) 
Example 36
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes vote down vote up
def test_nist(self):
        # These are the nist ANOVA files. They can be found at:
        # http://www.itl.nist.gov/div898/strd/anova/anova.html
        filenames = ['SiRstv.dat', 'SmLs01.dat', 'SmLs02.dat', 'SmLs03.dat',
                     'AtmWtAg.dat', 'SmLs04.dat', 'SmLs05.dat', 'SmLs06.dat',
                     'SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat']

        for test_case in filenames:
            rtol = 1e-7
            fname = os.path.abspath(os.path.join(os.path.dirname(__file__),
                                                 'data/nist_anova', test_case))
            with open(fname, 'r') as f:
                content = f.read().split('\n')
            certified = [line.split() for line in content[40:48]
                         if line.strip()]
            dataf = np.loadtxt(fname, skiprows=60)
            y, x = dataf.T
            y = y.astype(int)
            caty = np.unique(y)
            f = float(certified[0][-1])

            xlist = [x[y == i] for i in caty]
            res = stats.f_oneway(*xlist)

            # With the hard test cases we relax the tolerance a bit.
            hard_tc = ('SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat')
            if test_case in hard_tc:
                rtol = 1e-4

            assert_allclose(res[0], f, rtol=rtol,
                            err_msg='Failing testcase: %s' % test_case) 
Example 37
Project: triqler   Author: statisticalbiotechnology   File: diff_exp.py    Apache License 2.0 5 votes vote down vote up
def getPval(quants):
  anovaFvalue, anovaPvalue = f_oneway(*quants)
  if not np.isnan(anovaPvalue):
    return anovaPvalue
  else:
    #print(quants)
    return 1.0 
Example 38
Project: triqler   Author: statisticalbiotechnology   File: plot_posteriors.py    Apache License 2.0 5 votes vote down vote up
def printStats(geoAvgQuantRow, groups):
  print("\t".join(['%.2f' % x for x in geoAvgQuantRow]))
  
  args = parsers.getQuantGroups(geoAvgQuantRow, groups)
  anovaFvalue, anovaPvalue = f_oneway(*args)
  print("p-value:", anovaPvalue)
  print("")
  
  float_formatter = lambda x: "%.2f" % x
  np.set_printoptions(formatter={'float_kind':float_formatter})
  
  geoAvgs = np.matrix([parsers.geomAvg([2**y for y in x]) for x in args])
  ratioMatrix = np.log2(np.transpose(geoAvgs) / geoAvgs)
  #print(ratioMatrix)
  #print("") 
Example 39
Project: linear_neuron   Author: uglyboxer   File: test_feature_select.py    MIT License 5 votes vote down vote up
def test_f_oneway_vs_scipy_stats():
    # Test that our f_oneway gives the same result as scipy.stats
    rng = np.random.RandomState(0)
    X1 = rng.randn(10, 3)
    X2 = 1 + rng.randn(10, 3)
    f, pv = stats.f_oneway(X1, X2)
    f2, pv2 = f_oneway(X1, X2)
    assert_true(np.allclose(f, f2))
    assert_true(np.allclose(pv, pv2)) 
Example 40
Project: linear_neuron   Author: uglyboxer   File: test_feature_select.py    MIT License 5 votes vote down vote up
def test_f_oneway_ints():
    # Smoke test f_oneway on integers: that it does raise casting errors
    # with recent numpys
    rng = np.random.RandomState(0)
    X = rng.randint(10, size=(10, 10))
    y = np.arange(10)
    fint, pint = f_oneway(X, y)

    # test that is gives the same result as with float
    f, p = f_oneway(X.astype(np.float), y)
    assert_array_almost_equal(f, fint, decimal=4)
    assert_array_almost_equal(p, pint, decimal=4) 
Example 41
Project: Weiss   Author: WangWenjun559   File: test_feature_select.py    Apache License 2.0 5 votes vote down vote up
def test_f_oneway_vs_scipy_stats():
    # Test that our f_oneway gives the same result as scipy.stats
    rng = np.random.RandomState(0)
    X1 = rng.randn(10, 3)
    X2 = 1 + rng.randn(10, 3)
    f, pv = stats.f_oneway(X1, X2)
    f2, pv2 = f_oneway(X1, X2)
    assert_true(np.allclose(f, f2))
    assert_true(np.allclose(pv, pv2)) 
Example 42
Project: Weiss   Author: WangWenjun559   File: test_feature_select.py    Apache License 2.0 5 votes vote down vote up
def test_f_oneway_ints():
    # Smoke test f_oneway on integers: that it does raise casting errors
    # with recent numpys
    rng = np.random.RandomState(0)
    X = rng.randint(10, size=(10, 10))
    y = np.arange(10)
    fint, pint = f_oneway(X, y)

    # test that is gives the same result as with float
    f, p = f_oneway(X.astype(np.float), y)
    assert_array_almost_equal(f, fint, decimal=4)
    assert_array_almost_equal(p, pint, decimal=4) 
Example 43
Project: surveyhelper   Author: cwade   File: question.py    MIT License 5 votes vote down vote up
def compare_groups(self, groupby, 
                       pval = .05):
        data = [d[self.variable].dropna() for groupname, d in groupby]
        if len(groupby) == 2:
            ts, ps = ttest_ind(*data, equal_var=False)
            return(ps < pval)
        elif len(groupby.groups.keys()) >= 2:
            # ANOVA
            f, p = f_oneway(*data)
            return(p < .05)
        else:
            return(False) 
Example 44
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_feature_select.py    Apache License 2.0 5 votes vote down vote up
def test_f_oneway_vs_scipy_stats():
    # Test that our f_oneway gives the same result as scipy.stats
    rng = np.random.RandomState(0)
    X1 = rng.randn(10, 3)
    X2 = 1 + rng.randn(10, 3)
    f, pv = stats.f_oneway(X1, X2)
    f2, pv2 = f_oneway(X1, X2)
    assert_true(np.allclose(f, f2))
    assert_true(np.allclose(pv, pv2)) 
Example 45
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_feature_select.py    Apache License 2.0 5 votes vote down vote up
def test_f_oneway_ints():
    # Smoke test f_oneway on integers: that it does raise casting errors
    # with recent numpys
    rng = np.random.RandomState(0)
    X = rng.randint(10, size=(10, 10))
    y = np.arange(10)
    fint, pint = f_oneway(X, y)

    # test that is gives the same result as with float
    f, p = f_oneway(X.astype(np.float), y)
    assert_array_almost_equal(f, fint, decimal=4)
    assert_array_almost_equal(p, pint, decimal=4) 
Example 46
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 5 votes vote down vote up
def test_trivial(self):
        # A trivial test of stats.f_oneway, with F=0.
        F, p = stats.f_oneway([0,2], [0,2])
        assert_equal(F, 0.0) 
Example 47
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 5 votes vote down vote up
def test_basic(self):
        # Despite being a floating point calculation, this data should
        # result in F being exactly 2.0.
        F, p = stats.f_oneway([0,2], [2,4])
        assert_equal(F, 2.0) 
Example 48
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 5 votes vote down vote up
def test_large_integer_array(self):
        a = np.array([655, 788], dtype=np.uint16)
        b = np.array([789, 772], dtype=np.uint16)
        F, p = stats.f_oneway(a, b)
        assert_almost_equal(F, 0.77450216931805538) 
Example 49
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 5 votes vote down vote up
def test_nist(self):
        # These are the nist ANOVA files. They can be found at:
        # http://www.itl.nist.gov/div898/strd/anova/anova.html
        filenames = ['SiRstv.dat', 'SmLs01.dat', 'SmLs02.dat', 'SmLs03.dat',
                     'AtmWtAg.dat', 'SmLs04.dat', 'SmLs05.dat', 'SmLs06.dat',
                     'SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat']

        for test_case in filenames:
            rtol = 1e-7
            fname = os.path.abspath(os.path.join(os.path.dirname(__file__),
                                                 'data/nist_anova', test_case))
            with open(fname, 'r') as f:
                content = f.read().split('\n')
            certified = [line.split() for line in content[40:48]
                         if line.strip()]
            dataf = np.loadtxt(fname, skiprows=60)
            y, x = dataf.T
            y = y.astype(int)
            caty = np.unique(y)
            f = float(certified[0][-1])

            xlist = [x[y == i] for i in caty]
            res = stats.f_oneway(*xlist)

            # With the hard test cases we relax the tolerance a bit.
            hard_tc = ('SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat')
            if test_case in hard_tc:
                rtol = 1e-4

            assert_allclose(res[0], f, rtol=rtol,
                            err_msg='Failing testcase: %s' % test_case) 
Example 50
Project: SCALE   Author: jsxlei   File: plot.py    MIT License 5 votes vote down vote up
def feature_specifity(feature, ref, classes, figsize=(6,6), save=None):
    """
    Calculate the feature specifity:

    Input:
        feature: latent feature
        ref: cluster assignments
        classes: cluster classes
    """
    from scipy.stats import f_oneway
    # n_cluster = max(ref) + 1
    n_cluster = len(classes)
    dim = feature.shape[1] # feature dimension
    pvalue_mat = np.zeros((dim, n_cluster))
    for i,cluster in enumerate(classes):
        for feat in range(dim):
            a = feature.iloc[:, feat][ref == cluster]
            b = feature.iloc[:, feat][ref != cluster]
            pvalue = f_oneway(a,b)[1]
            pvalue_mat[feat, i] = pvalue

    plt.figure(figsize=figsize)
    grid = sns.heatmap(-np.log10(pvalue_mat), cmap='RdBu_r', 
                       vmax=20,
                       yticklabels=np.arange(10)+1, 
                       xticklabels=classes[:n_cluster],
                       )
    grid.set_ylabel('Feature', fontsize=18)
    grid.set_xticklabels(labels=classes[:n_cluster], rotation=45, fontsize=18)
    grid.set_yticklabels(labels=np.arange(dim)+1, fontsize=16)
    cbar = grid.collections[0].colorbar
    cbar.set_label('-log10 (Pvalue)', fontsize=18) #, rotation=0, x=-0.9, y=0)
    
    if save:
        plt.savefig(save, format='pdf', bbox_inches='tight')
    else:
        plt.show() 
Example 51
Project: iswc-2016-semantic-labeling   Author: minhptx   File: numeric.py    Apache License 2.0 5 votes vote down vote up
def anova_test(train_examples, test_examples):
    if test_examples[-1] > 50 or train_examples[-1] > 50:
        return 0
    if len(train_examples) > 1 and len(test_examples) > 1:
        result = f_oneway(train_examples, test_examples).pvalue
        return result
    return 0 
Example 52
Project: senior-design   Author: james-tate   File: test_stats.py    GNU General Public License v2.0 5 votes vote down vote up
def test_trivial(self):
        """A trivial test of stats.f_oneway, with F=0."""
        F, p = stats.f_oneway([0,2], [0,2])
        assert_equal(F, 0.0) 
Example 53
Project: senior-design   Author: james-tate   File: test_stats.py    GNU General Public License v2.0 5 votes vote down vote up
def test_basic(self):
        """A test of stats.f_oneway, with F=2."""
        F, p = stats.f_oneway([0,2], [2,4])
        # Despite being a floating point calculation, this data should
        # result in F being exactly 2.0.
        assert_equal(F, 2.0) 
Example 54
Project: gender_analysis   Author: dhmit   File: instance_distance.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_p_vals(location_median_results, author_gender_median_results, date_median_results):
    """
    Takes results from **results_by_location(results, 'median')**, **results_by_author_gender**,
    **results_by_date**.

    ANOVA test for independence of:

    - male vs female authors' median distance between female instances
    - UK vs. US vs. other country authors' median distance between female instances
    - Date ranges authors' median distance between female instances

    :param location_median_results: result of **results_by_location(results, 'median')**
    :param author_gender_median_results: result of **results_by_author_gender(results, 'median)**
    :param date_median_results: result of **results_by_date(results, 'median')**
    :return: data-frame with 3 p-values, one for each category comparison

    """

    r1 = location_median_results
    r2 = author_gender_median_results
    r3 = date_median_results

    names = ["location", "male_vs_female_authors", "date"]
    # median_distance_between_female_pronouns_pvals = []

    location_medians = []
    author_gender_medians = []
    date_medians = []

    med = [location_medians, author_gender_medians, date_medians]
    res = [r1, r2, r3]

    for r in range(0, 3):
        for key in list(res[r].keys()):
            medians = []
            for el in list(res[r][key]):
                medians.append(el[1])
            med[r].append(medians)
    _, location_pval = stats.f_oneway(location_medians[0], location_medians[1])
    _, author_gender_pval = stats.f_oneway(author_gender_medians[0], author_gender_medians[1])
    _, date_pval = stats.f_oneway(*date_medians)
    median_distance_between_female_pronouns_pvals = [location_pval, author_gender_pval, date_pval]

    return pnds.DataFrame({"names": names, "pvals": median_distance_between_female_pronouns_pvals}) 
Example 55
Project: xam   Author: MaxHalford   File: eda.py    MIT License 4 votes vote down vote up
def feature_importance_regression(features, target, n_neighbors=3, random_state=None):

    cont = features.select_dtypes(include=[np.floating])
    disc = features.select_dtypes(include=[np.integer, np.bool])

    cont_imp = pd.DataFrame(index=cont.columns)
    disc_imp = pd.DataFrame(index=disc.columns)

    # Continuous features
    if cont_imp.index.size > 0:

        # Pearson correlation
        pearson = np.array([stats.pearsonr(feature, target) for _, feature in cont.iteritems()])
        cont_imp['pearson_r'] = pearson[:, 0]
        cont_imp['pearson_r_p_value'] = pearson[:, 1]

        # Mutual information
        mut_inf = feature_selection.mutual_info_regression(cont, target, discrete_features=False,
                                                           n_neighbors=n_neighbors,
                                                           random_state=random_state)
        cont_imp['mutual_information'] = mut_inf

    # Discrete features
    if disc_imp.index.size > 0:

        # F-test
        f_tests = defaultdict(dict)

        for feature in disc.columns:
            groups = [target[idxs] for idxs in disc.groupby(feature).groups.values()]
            statistic, p_value = stats.f_oneway(*groups)
            f_tests[feature]['f_statistic'] = statistic
            f_tests[feature]['f_p_value'] = p_value

        f_tests_df = pd.DataFrame.from_dict(f_tests, orient='index')
        disc_imp['f_statistic'] = f_tests_df['f_statistic']
        disc_imp['f_p_value'] = f_tests_df['f_p_value']

        # Mutual information
        mut_inf = feature_selection.mutual_info_regression(disc, target, discrete_features=True,
                                                           n_neighbors=n_neighbors,
                                                           random_state=random_state)
        disc_imp['mutual_information'] = mut_inf

    return cont_imp, disc_imp 
Example 56
Project: DIVE-backend   Author: MacroConnections   File: numerical_comparison.py    GNU General Public License v3.0 4 votes vote down vote up
def get_valid_tests(equal_var, independent, normal, num_samples):
    '''
    Get valid tests given number of samples and statistical characterization of
    samples:

    Equal variance
    Indepenence
    Normality
    '''
    if num_samples == 1:
        valid_tests = {
            'chisquare': stats.chisquare,
            'power_divergence': stats.power_divergence,
            'kstest': stats.kstest
        }
        if normal:
            valid_tests['input']['one_sample_ttest'] = stats.ttest_1samp

    elif num_samples == 2:
        if independent:
            valid_tests = {
                'mannwhitneyu': stats.mannwhitneyu,
                'kruskal': stats.kruskal,
                'ks_2samp': stats.ks_2samp
            }
            if normal:
                valid_tests['two_sample_ttest'] = stats.ttest_ind
                if equal_var:
                    valid_tests['f_oneway'] = stats.f_oneway
        else:
            valid_tests = {
                'two_sample_ks': stats.ks_2samp,
                'wilcoxon': stats.wilcoxon
            }
            if normal:
                valid_tests['two_sample_related_ttest'] = stats.ttest_rel

    elif num_samples >= 3:
        if independent:
            valid_tests = {
                'kruskal': stats.kruskal
            }
            if normal and equal_var:
                valid_tests['f_oneway'] = stats.f_oneway

        else:
            valid_tests['friedmanchisquare'] = stats.friedmanchisquare

    return valid_tests 
Example 57
Project: hail   Author: hail-is   File: combine.py    MIT License 4 votes vote down vote up
def combine(output, files):
    init_logging()
    logging.info(f'Writing combine output to {output}')
    n_files = len(files)
    if n_files < 1:
        raise ValueError(f"'combine' requires at least 1 file to merge")
    logging.info(f'{len(files)} files to merge')

    config = None
    benchmark_data = collections.defaultdict(lambda: {'failed': False, 'trials': []})

    for file in files:
        with open(file, 'r') as f:
            data = json.load(f)
        config = config or data['config']  # take first config; should be similar
        for bm in data['benchmarks']:
            bm_data = benchmark_data[bm['name']]
            if bm['failed']:
                bm_data['failed'] = True
            else:
                bm_data['trials'].append(bm['times'])

    import numpy as np
    import scipy.stats as stats

    benchmark_json = []
    for name, data in benchmark_data.items():
        data['name'] = name
        if not data['failed']:
            flat_times = [t for trial in data['trials'] for t in trial]
            data['times'] = flat_times
            data['median'] = np.median(flat_times)
            data['mean'] = np.mean(flat_times)
            data['stdev'] = np.std(flat_times)
            f_stat, p_value = stats.f_oneway(*data['trials'])
            data['f-stat'] = f_stat
            data['p-value'] = p_value
            if p_value < 0.001:
                logging.warning(
                    f'benchmark {name} had significantly different trial distributions (p={p_value}, F={f_stat}):' +
                    ''.join('\n  ' + ', '.join([f'{x:.2f}s' for x in trial]) for trial in data['trials']))

        benchmark_json.append(data)

    with open(output, 'w') as out:
        json.dump({'config': config, 'benchmarks': benchmark_json}, out) 
Example 58
Project: scRNA-Seq   Author: broadinstitute   File: misc.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def perform_oneway_anova(data, glist, restriction_vec, group_str, fdr_alpha = 0.05):
	selected = np.ones(data.shape[0], dtype = bool)
	for rest_str in restriction_vec:
		attr, value_str = rest_str.split(':')
		values = value_str.split(',')
		selected = selected & np.isin(data.obs[attr], values)
	gene_list = np.array(glist)
	gene_list = gene_list[np.isin(gene_list, data.var_names)]	
	newdat = data[selected, :][:, gene_list].copy()
	newdat.X = newdat.X.toarray()
	group_attr, tmp_str = group_str.split(':')
	groups_str = tmp_str.split(';')
	ngr = len(groups_str)
	group_names = []
	group_idx = np.zeros((ngr, newdat.shape[0]), dtype = bool)
	for i, gstr in enumerate(groups_str):
		name, values = gstr.split('~')
		group_names.extend([name + '_mean', name + '_percent'])
		group_idx[i] = np.isin(newdat.obs[group_attr], values.split(','))
	np.warnings.filterwarnings('ignore')
	stats = np.zeros((len(gene_list), 3 + ngr * 2))
	for i in range(len(gene_list)):
		arr_list = []
		for j in range(group_idx.shape[0]):
			arr = newdat.X[group_idx[j], i]
			stats[i, 3 + j * 2] = arr.mean()
			stats[i, 3 + j * 2 + 1] = (arr > 0).sum() * 100.0 / arr.size
			arr_list.append(arr)
		stats[i, 0], stats[i, 1] = f_oneway(*arr_list)
		if np.isnan(stats[i, 0]):
			stats[i, 0] = 0.0
			stats[i, 1] = 1.0
	passed, stats[:, 2] = fdr(stats[:, 1])
	cols = ['fstat', 'pval', 'qval']
	cols.extend(group_names)
	raw_results = pd.DataFrame(stats, columns = cols, index = gene_list)
	results = raw_results[raw_results['qval'] <= fdr_alpha]
	results = results.sort_values('qval')
	return results, raw_results



# labels, cluster labels for each sample; conds, conditions; cond_order, condition orders 
Example 59
Project: physalia   Author: TQRG   File: analytics.py    MIT License 4 votes vote down vote up
def smart_hypothesis_testing(*samples, **options):
    """Do a smart hypothesis testing."""
    fancy = options.get('fancy', True)
    out = options.get('out', sys.stdout)
    alpha = options.get('alpha', 0.05)
    equal_var = options.get('equal_var', True)
    latex = options.get('latex', True)

    samples = [np.array(sample, dtype='float') for sample in samples]
    len_samples = len(samples)
    out_buffer = StringIO()

    normality_results = samples_are_normal(*samples)
    if all(map(itemgetter(0), normality_results)):
        # all our samples are normal
        if equal_var:
            if fancy:
                out_buffer.write(Template(
                    u"Hypothesis testing:\n\n"
                    "\t$H0: ${mu}1 = ${mu}2{ellipsis} = $mu{len_samples}. "
                    "The means for all groups are equal.\n"
                    "\t$H1: $exists a,b $elementof Samples: ${mu}a $neq ${mu}b. "
                    "At least two of the means are not equal.\n\n"
                    "The significance test one-way analysis of variance (ANOVA) "
                    "was used with a significance level of $alpha={alpha:.2f}.\n"
                    "This test requires that the following "
                    "assumptions are satisfied:\n\n"
                    "1. Samples are independent.\n"
                    "2. Samples are drawn from a normally distributed population.\n"
                    "3. All populations have equal standard deviation.\n\n"
                    "For the assumption of normal distribution two tests were "
                    "performed ($alpha={alpha}): Shapiro Wilk's test "
                    "and D'Agostino and Pearson's test.\n"
                    "None of these tests reject the null hypothesis with "
                    "significance level of $alpha={alpha}, thus it is assumed that data "
                    "follows a normal distribution.\n\n"
                    "").substitute(GREEK_ALPHABET).format(
                        ellipsis=" = ..." if len_samples > 3 else "",
                        **locals()
                    ))
            statistic, pvalue = f_oneway(*samples)
            if fancy:
                if pvalue < alpha:
                    out_buffer.write(
                        u"One can say that samples come from populations "
                        "with different means, since ANOVA rejects the "
                        "null hypothesis "
                        "(statistic={statistic:.2f}, {pvalue_str}).\n"
                        "".format(pvalue_str=_pvalue_to_str(pvalue), **locals())
                    )
                else:
                    out_buffer.write(
                        u"Thus, it was not possible to find evidence that"
                        " the means of populations are different "
                        "(statistic={statistic:.2f},{rho}={pvalue:.2f}).\n"
                        "".format(**locals())
                    )
            _flush_output(out, out_buffer, latex)
            return statistic, pvalue, f_oneway