Python scipy.stats.anderson_ksamp() Examples

The following are 12 code examples of scipy.stats.anderson_ksamp(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_example1a(self):
        # Example data from Scholz & Stephens (1987), originally
        # published in Lehmann (1995, Nonparametrics, Statistical
        # Methods Based on Ranks, p. 309)
        # Pass a mixture of lists and arrays
        t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]
        t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
        t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
        t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])
        assert_warns(UserWarning, stats.anderson_ksamp, (t1, t2, t3, t4),
                     midrank=False)
        with suppress_warnings() as sup:
            sup.filter(UserWarning, message='approximate p-value')
            Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=False)

        assert_almost_equal(Tk, 4.449, 3)
        assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459],
                                  tm, 4)
        assert_almost_equal(p, 0.0021, 4) 
Example #2
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_example1b(self):
        # Example data from Scholz & Stephens (1987), originally
        # published in Lehmann (1995, Nonparametrics, Statistical
        # Methods Based on Ranks, p. 309)
        # Pass arrays
        t1 = np.array([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0])
        t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
        t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
        t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])
        with suppress_warnings() as sup:
            sup.filter(UserWarning, message='approximate p-value')
            Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=True)

        assert_almost_equal(Tk, 4.480, 3)
        assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459],
                                  tm, 4)
        assert_almost_equal(p, 0.0020, 4) 
Example #3
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_result_attributes(self):
        # Example data from Scholz & Stephens (1987), originally
        # published in Lehmann (1995, Nonparametrics, Statistical
        # Methods Based on Ranks, p. 309)
        # Pass a mixture of lists and arrays
        t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]
        t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
        t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
        t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])

        with suppress_warnings() as sup:
            sup.filter(UserWarning, message='approximate p-value')
            res = stats.anderson_ksamp((t1, t2, t3, t4), midrank=False)

        attributes = ('statistic', 'critical_values', 'significance_level')
        check_named_results(res, attributes) 
Example #4
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_example2a(self):
        # Example data taken from an earlier technical report of
        # Scholz and Stephens
        # Pass lists instead of arrays
        t1 = [194, 15, 41, 29, 33, 181]
        t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118]
        t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34]
        t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29,
              118, 25, 156, 310, 76, 26, 44, 23, 62]
        t5 = [130, 208, 70, 101, 208]
        t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27]
        t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33]
        t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5,
              12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95]
        t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82,
              54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24]
        t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36,
               22, 139, 210, 97, 30, 23, 13, 14]
        t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438]
        t12 = [50, 254, 5, 283, 35, 12]
        t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130]
        t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66,
               61, 34]
        with suppress_warnings() as sup:
            sup.filter(UserWarning, message='approximate p-value')
            Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4, t5, t6, t7, t8,
                                              t9, t10, t11, t12, t13, t14),
                                             midrank=False)

        assert_almost_equal(Tk, 3.288, 3)
        assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009],
                                  tm, 4)
        assert_almost_equal(p, 0.0041, 4) 
Example #5
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_example2b(self):
        # Example data taken from an earlier technical report of
        # Scholz and Stephens
        t1 = [194, 15, 41, 29, 33, 181]
        t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118]
        t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34]
        t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29,
              118, 25, 156, 310, 76, 26, 44, 23, 62]
        t5 = [130, 208, 70, 101, 208]
        t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27]
        t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33]
        t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5,
              12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95]
        t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82,
              54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24]
        t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36,
               22, 139, 210, 97, 30, 23, 13, 14]
        t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438]
        t12 = [50, 254, 5, 283, 35, 12]
        t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130]
        t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66,
               61, 34]
        with suppress_warnings() as sup:
            sup.filter(UserWarning, message='approximate p-value')
            Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4, t5, t6, t7, t8,
                                              t9, t10, t11, t12, t13, t14),
                                             midrank=True)

        assert_almost_equal(Tk, 3.294, 3)
        assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009],
                                  tm, 4)
        assert_almost_equal(p, 0.0041, 4) 
Example #6
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_not_enough_samples(self):
        assert_raises(ValueError, stats.anderson_ksamp, np.ones(5)) 
Example #7
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_empty_sample(self):
        assert_raises(ValueError, stats.anderson_ksamp, (np.ones(5), [])) 
Example #8
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_overflow(self):
        # when significance_level approximation overflows, should still return
        with suppress_warnings() as sup:
            sup.filter(UserWarning, message='approximate p-value')
            res = stats.anderson_ksamp([[-20, -10] * 100, [-10, 40, 12] * 100])
            assert_almost_equal(res[0], 272.796, 3) 
Example #9
Source File: _dp_verification.py    From whitenoise-system with MIT License 5 votes vote down vote up
def anderson_ksamp(self, fD1, fD2):
        """
        Anderson Darling Test
        """
        return stats.anderson_ksamp([fD1, fD2]) 
Example #10
Source File: testers.py    From temci with GNU General Public License v3.0 5 votes vote down vote up
def _test_impl(self, data1: t.List[Number], data2: t.List[Number]) -> float:
        return max(st.anderson_ksamp([data1, data2])[-1], 1) 
Example #11
Source File: Statistics.py    From ClearMap with GNU General Public License v3.0 4 votes vote down vote up
def testCompletedCumulatives(data, method = 'AndersonDarling', offset = None, plot = False):
    """Test if data sets have the same number / intensity distribution by adding max intensity counts to the smaller sized data sets and performing a distribution comparison test"""
    
    #idea: fill up data points to the same numbers at the high intensity values and use KS test
    #cf. work in progress on thoouroghly testing the differences in histograms
    
    #fill up the low count data
    n = numpy.array([x.size for x in data]);
    nm = n.max();
    m = numpy.array([x.max() for x in data]);
    mm = m.max();
    k = n.size;
    #print nm, mm, k
    
    if offset is None:
        #assume data starts at 0 !
        offset = mm / nm; #ideall for all statistics this should be mm + eps to have as little influence as possible.
    

    datac = [x.copy() for x in data];
    for i in range(m.size):
        if n[i] < nm:
            datac[i] = numpy.concatenate((datac[i], numpy.ones(nm-n[i], dtype = datac[i].dtype) * (mm + offset))); # + 10E-5 * numpy.random.rand(nm-n[i])));
         
    #test by plotting
    if plot is True:
        import matplotlib.pyplot as plt;
        for i in range(m.size):
            datac[i].sort();
            plt.step(datac[i], numpy.arange(datac[i].size));
    
    #perfomr the tests
    if method == 'KolmogorovSmirnov' or method == 'KS':
        if k == 2:
            (s, p) = stats.ks_2samp(datac[0], datac[1]);
        else:
            raise RuntimeError('KolmogorovSmirnov only for 2 samples not %d' % k);
        
    elif method == 'CramervonMises' or method == 'CM':
        if k == 2:
            (s,p) = stats2.testCramerVonMises2Sample(datac[0], datac[1]);
        else:
            raise RuntimeError('CramervonMises only for 2 samples not %d' % k);
      
    elif method == 'AndersonDarling' or method == 'AD':
        (s,a,p) = stats.anderson_ksamp(datac);

    return (p,s); 
Example #12
Source File: Statistics.py    From ClearMap with GNU General Public License v3.0 4 votes vote down vote up
def testCompletedInvertedCumulatives(data, method = 'AndersonDarling', offset = None, plot = False):
    """Test if data sets have the same number / intensity distribution by adding zero intensity counts to the smaller sized data sets and performing a distribution comparison test on the reversed cumulative distribution"""
    
    #idea: fill up data points to the same numbers at the high intensity values and use KS test
    #cf. work in progress on thoouroghly testing the differences in histograms
    
    #fill up the low count data
    n = numpy.array([x.size for x in data]);
    nm = n.max();
    m = numpy.array([x.max() for x in data]);
    mm = m.max();
    k = n.size;
    #print nm, mm, k
    
    if offset is None:
        #assume data starts at 0 !
        offset = mm / nm; #ideall for all statistics this should be mm + eps to have as little influence as possible.
    

    datac = [x.copy() for x in data];
    for i in range(m.size):
        if n[i] < nm:
            datac[i] = numpy.concatenate((-datac[i], numpy.ones(nm-n[i], dtype = datac[i].dtype) * (offset))); # + 10E-5 * numpy.random.rand(nm-n[i])));
        else:
            datac[i] = -datac[i];
         
    #test by plotting
    if plot is True:
        import matplotlib.pyplot as plt;
        for i in range(m.size):
            datac[i].sort();
            plt.step(datac[i], numpy.arange(datac[i].size));
    
    #perfomr the tests
    if method == 'KolmogorovSmirnov' or method == 'KS':
        if k == 2:
            (s, p) = stats.ks_2samp(datac[0], datac[1]);
        else:
            raise RuntimeError('KolmogorovSmirnov only for 2 samples not %d' % k);
        
    elif method == 'CramervonMises' or method == 'CM':
        if k == 2:
            (s,p) = stats2.testCramerVonMises2Sample(datac[0], datac[1]);
        else:
            raise RuntimeError('CramervonMises only for 2 samples not %d' % k);
      
    elif method == 'AndersonDarling' or method == 'AD':
        (s,a,p) = stats.anderson_ksamp(datac);

    return (p,s);