Python scipy.stats.shapiro() Examples

The following are 19 code examples of scipy.stats.shapiro(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: test_statstools.py    From vnpy_crypto with MIT License 7 votes vote down vote up
def test_shapiro():
    #tests against R fBasics
    #testing scipy.stats
    from scipy.stats import shapiro

    st_pv_R = np.array([0.939984787255526, 0.239621898000460])
    sh = shapiro(x)
    assert_almost_equal(sh, st_pv_R, 4)

    #st is ok -7.15e-06, pval agrees at -3.05e-10
    st_pv_R = np.array([5.799574255943298e-01, 1.838456834681376e-06 * 1e4])
    sh = shapiro(x**2) * np.array([1, 1e4])
    assert_almost_equal(sh, st_pv_R, 5)

    st_pv_R = np.array([0.91730442643165588, 0.08793704167882448])
    sh = shapiro(np.log(x**2))
    assert_almost_equal(sh, st_pv_R, 5)

    #diff is [  9.38773155e-07,   5.48221246e-08]
    st_pv_R = np.array([0.818361863493919373, 0.001644620895206969])
    sh = shapiro(np.exp(-x**2))
    assert_almost_equal(sh, st_pv_R, 5) 
Example #2
Source File: Sensor.py    From Predictive-Maintenance with MIT License 7 votes vote down vote up
def normality_test(self):
        _, series_values, _ = self.load_dataset()
        results = stats.shapiro(series_values)
        if results[1] > 0.05:
            self.normality = 1
        else:
            self.normality = 0
        # write results to a file
        # with open(os.path.join(self.root_path, 'normality.txt'), 'a') as f:
        #     f.write('sensor name: ' + str(self.sensor_name + '-' + self.sample_rate) + ' ,normality: ' + str(self.normality) + '\n')
        # save histogram image
        # fig = pyplot.figure()
        # pyplot.hist(series_values)
        # pyplot.title(self.file_name, fontsize=20)
        # pyplot.xlabel('Value', fontsize=16)
        # pyplot.ylabel('Frequency', fontsize=16)
        # fig.savefig(os.path.join(self.root_path, 'distribution_test', self.file_name + '.png'), bbox_inches='tight', dpi=150) 
Example #3
Source File: test_gaussianize.py    From gaussianize with MIT License 6 votes vote down vote up
def test_normality_increase_lambert(self):
        # Generate random data and check that it is more normal after inference
        for i, y in enumerate([np.random.standard_cauchy(size=ns), experimental_data]):
            print('Distribution %d' % i)
            print('Before')
            print(('anderson: %0.3f\tshapiro: %0.3f' % (anderson(y)[0], shapiro(y)[0])).expandtabs(30))
            stats.probplot(y, dist="norm", plot=plt)
            plt.savefig(os.path.join(self.test_dir, '%d_before.png' % i))
            plt.clf()
    
            tau = g.igmm(y)
            x = g.w_t(y, tau)
            print('After')
            print(('anderson: %0.3f\tshapiro: %0.3f' % (anderson(x)[0], shapiro(x)[0])).expandtabs(30))
            stats.probplot(x, dist="norm", plot=plt)
            plt.savefig(os.path.join(self.test_dir, '%d_after.png' % i))
            plt.clf() 
Example #4
Source File: regression.py    From pycircstat with MIT License 5 votes vote down vote up
def test(self, alpha, x):
        """
        Tests whether alpha and x are significantly correlated.
        The test assumes that x is normally distributed. The test
        function uses a Shapiro-Wilk test to test this assumption.

        :param alpha: independent variable, angles in radians
        :param x: dependent variable
        :return: test results of Shapiro-Wilk and Liddell-Ord test
        :rtype: pandas.DataFrame

        References: [Jammalamadaka2001]_
        """
        w, psw = stats.shapiro(x)
        if psw < 0.05:
            warnings.warn("This test requires Gaussian distributed x")

        rxc, rxs, rcs = np.corrcoef(x, np.cos(alpha))[0,1], np.corrcoef(x, np.sin(alpha))[0,1], \
                        np.corrcoef(np.cos(alpha), np.sin(alpha))[0,1]
        n = len(alpha)
        r2 = (rxc**2 + rxs**2 - 2*rxc*rxs*rcs)/(1 - rcs**2)
        f = (n-3)*r2/(1-r2)
        p = stats.f.sf(f, 2, n-3)

        df = pd.DataFrame(dict(
            test = ['Shapiro-Wilk','Liddell-Ord'],
            statistics = [w, f],
            p = [psw, p],
            dof = [None, (2, n-3)]
        )).set_index('test')
        return df 
Example #5
Source File: test_qmc.py    From botorch with MIT License 5 votes vote down vote up
def test_MultivariateNormalQMCEngineDegenerate(self):
        for dtype in (torch.float, torch.double):
            # X, Y iid standard Normal and Z = X + Y, random vector (X, Y, Z)
            mean = torch.zeros(3, device=self.device, dtype=dtype)
            cov = torch.tensor(
                [[1, 0, 1], [0, 1, 1], [1, 1, 2]], device=self.device, dtype=dtype
            )
            engine = MultivariateNormalQMCEngine(mean=mean, cov=cov, seed=12345)
            samples = engine.draw(n=2000)
            self.assertEqual(samples.dtype, dtype)
            self.assertEqual(samples.device.type, self.device.type)
            self.assertTrue(torch.all(torch.abs(samples.mean(dim=0)) < 1e-2))
            self.assertTrue(torch.abs(torch.std(samples[:, 0]) - 1) < 1e-2)
            self.assertTrue(torch.abs(torch.std(samples[:, 1]) - 1) < 1e-2)
            self.assertTrue(torch.abs(torch.std(samples[:, 2]) - math.sqrt(2)) < 1e-2)
            for i in (0, 1, 2):
                _, pval = shapiro(samples[:, i].cpu().numpy())
                self.assertGreater(pval, 0.9)
            cov = np.cov(samples.cpu().numpy().transpose())
            self.assertLess(np.abs(cov[0, 1]), 1e-2)
            self.assertLess(np.abs(cov[0, 2] - 1), 1e-2)
            # check to see if X + Y = Z almost exactly
            self.assertTrue(
                torch.all(
                    torch.abs(samples[:, 0] + samples[:, 1] - samples[:, 2]) < 1e-5
                )
            ) 
Example #6
Source File: test_qmc.py    From botorch with MIT License 5 votes vote down vote up
def test_MultivariateNormalQMCEngineShapiro(self):
        for dtype in (torch.float, torch.double):
            # test the standard case
            mean = torch.zeros(2, device=self.device, dtype=dtype)
            cov = torch.eye(2, device=self.device, dtype=dtype)
            engine = MultivariateNormalQMCEngine(mean=mean, cov=cov, seed=12345)
            samples = engine.draw(n=250)
            self.assertEqual(samples.dtype, dtype)
            self.assertEqual(samples.device.type, self.device.type)
            self.assertTrue(torch.all(torch.abs(samples.mean(dim=0)) < 1e-2))
            self.assertTrue(torch.all(torch.abs(samples.std(dim=0) - 1) < 1e-2))
            # perform Shapiro-Wilk test for normality
            samples = samples.cpu().numpy()
            for i in (0, 1):
                _, pval = shapiro(samples[:, i])
                self.assertGreater(pval, 0.9)
            # make sure samples are uncorrelated
            cov = np.cov(samples.transpose())
            self.assertLess(np.abs(cov[0, 1]), 1e-2)

            # test the correlated, non-zero mean case
            mean = torch.tensor([1.0, 2.0], device=self.device, dtype=dtype)
            cov = torch.tensor(
                [[1.5, 0.5], [0.5, 1.5]], device=self.device, dtype=dtype
            )
            engine = MultivariateNormalQMCEngine(mean=mean, cov=cov, seed=12345)
            samples = engine.draw(n=250)
            self.assertEqual(samples.dtype, dtype)
            self.assertEqual(samples.device.type, self.device.type)
            self.assertTrue(torch.all(torch.abs(samples.mean(dim=0) - mean) < 1e-2))
            self.assertTrue(
                torch.all(torch.abs(samples.std(dim=0) - math.sqrt(1.5)) < 1e-2)
            )
            # perform Shapiro-Wilk test for normality
            samples = samples.cpu().numpy()
            for i in (0, 1):
                _, pval = shapiro(samples[:, i])
                self.assertGreater(pval, 0.9)
            # check covariance
            cov = np.cov(samples.transpose())
            self.assertLess(np.abs(cov[0, 1] - 0.5), 1e-2) 
Example #7
Source File: test_qmc.py    From botorch with MIT License 5 votes vote down vote up
def test_NormalQMCEngineShapiroInvTransform(self):
        engine = NormalQMCEngine(d=2, seed=12345, inv_transform=True)
        samples = engine.draw(n=250)
        self.assertEqual(samples.dtype, torch.float)
        self.assertTrue(torch.all(torch.abs(samples.mean(dim=0)) < 1e-2))
        self.assertTrue(torch.all(torch.abs(samples.std(dim=0) - 1) < 1e-2))
        # perform Shapiro-Wilk test for normality
        for i in (0, 1):
            _, pval = shapiro(samples[:, i])
            self.assertGreater(pval, 0.9)
        # make sure samples are uncorrelated
        cov = np.cov(samples.numpy().transpose())
        self.assertLess(np.abs(cov[0, 1]), 1e-2) 
Example #8
Source File: test_qmc.py    From botorch with MIT License 5 votes vote down vote up
def test_NormalQMCEngineShapiro(self):
        engine = NormalQMCEngine(d=2, seed=12345)
        samples = engine.draw(n=250)
        self.assertEqual(samples.dtype, torch.float)
        self.assertTrue(torch.all(torch.abs(samples.mean(dim=0)) < 1e-2))
        self.assertTrue(torch.all(torch.abs(samples.std(dim=0) - 1) < 1e-2))
        # perform Shapiro-Wilk test for normality
        for i in (0, 1):
            _, pval = shapiro(samples[:, i])
            self.assertGreater(pval, 0.9)
        # make sure samples are uncorrelated
        cov = np.cov(samples.numpy().transpose())
        self.assertLess(np.abs(cov[0, 1]), 1e-2) 
Example #9
Source File: sw.py    From spm1d with GNU General Public License v3.0 5 votes vote down vote up
def sw_single_node(x):
	w,p = stats.shapiro(x)
	return w,p 
Example #10
Source File: microlstats.py    From pyLIMA with GNU General Public License v3.0 5 votes vote down vote up
def normal_Shapiro_Wilk(sample):
    """Compute a Shapiro-Wilk test on the sample versus a normal distribution with mu = 0, sigma = 1

            :param array_like sample: the sample you want to check the "Gaussianity"
            :returns: the Shapiro-Wilk statistic and its related p_value
            :rtype: float, float
    """

    SW_stat, SW_pvalue = ss.shapiro(sample)

    # the null hypothesis can not be rejected ( i.e the distribution of sample come from a Gaussian) if SW_stat -> 1
    # the null hypothesis can not be rejected ( i.e the distribution of sample come from a Gaussian) if SW_pvalue -> 1

    # Judegement made on the STATISTIC because 'W test statistic is accurate but the p-value may not be" (see scipy doc)
    SW_judgement = 0

    if SW_pvalue > 0.01:
        SW_judgement = 1

    if SW_pvalue > 0.05:
        SW_judgement = 2


    return  SW_stat, SW_pvalue, SW_judgement


### Statistics fit quality metrics 
Example #11
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_bad_arg(self):
        # Length of x is less than 3.
        x = [1]
        assert_raises(ValueError, stats.shapiro, x) 
Example #12
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_not_enough_values(self):
        assert_raises(ValueError, stats.shapiro, [1, 2])
        assert_raises(ValueError, stats.shapiro, [[], [2]]) 
Example #13
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_empty_input(self):
        assert_raises(ValueError, stats.shapiro, [])
        assert_raises(ValueError, stats.shapiro, [[], [], []]) 
Example #14
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_2d(self):
        x1 = [[0.11, 7.87, 4.61, 10.14, 7.95, 3.14, 0.46,
              4.43, 0.21, 4.75], [0.71, 1.52, 3.24,
              0.93, 0.42, 4.97, 9.53, 4.55, 0.47, 6.66]]
        w, pw = stats.shapiro(x1)
        assert_almost_equal(w, 0.90047299861907959, 6)
        assert_almost_equal(pw, 0.042089745402336121, 6)
        x2 = [[1.36, 1.14, 2.92, 2.55, 1.46, 1.06, 5.27, -1.11,
              3.48, 1.10], [0.88, -0.51, 1.46, 0.52, 6.20, 1.69,
              0.08, 3.67, 2.81, 3.49]]
        w, pw = stats.shapiro(x2)
        assert_almost_equal(w, 0.9590270, 6)
        assert_almost_equal(pw, 0.52460, 3) 
Example #15
Source File: test_morestats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_basic(self):
        x1 = [0.11, 7.87, 4.61, 10.14, 7.95, 3.14, 0.46,
              4.43, 0.21, 4.75, 0.71, 1.52, 3.24,
              0.93, 0.42, 4.97, 9.53, 4.55, 0.47, 6.66]
        w, pw = stats.shapiro(x1)
        assert_almost_equal(w, 0.90047299861907959, 6)
        assert_almost_equal(pw, 0.042089745402336121, 6)
        x2 = [1.36, 1.14, 2.92, 2.55, 1.46, 1.06, 5.27, -1.11,
              3.48, 1.10, 0.88, -0.51, 1.46, 0.52, 6.20, 1.69,
              0.08, 3.67, 2.81, 3.49]
        w, pw = stats.shapiro(x2)
        assert_almost_equal(w, 0.9590270, 6)
        assert_almost_equal(pw, 0.52460, 3)

        # Verified against R
        np.random.seed(12345678)
        x3 = stats.norm.rvs(loc=5, scale=3, size=100)
        w, pw = stats.shapiro(x3)
        assert_almost_equal(w, 0.9772805571556091, decimal=6)
        assert_almost_equal(pw, 0.08144091814756393, decimal=3)

        # Extracted from original paper
        x4 = [0.139, 0.157, 0.175, 0.256, 0.344, 0.413, 0.503, 0.577, 0.614,
              0.655, 0.954, 1.392, 1.557, 1.648, 1.690, 1.994, 2.174, 2.206,
              3.245, 3.510, 3.571, 4.354, 4.980, 6.084, 8.351]
        W_expected = 0.83467
        p_expected = 0.000914
        w, pw = stats.shapiro(x4)
        assert_almost_equal(w, W_expected, decimal=4)
        assert_almost_equal(pw, p_expected, decimal=5) 
Example #16
Source File: test_morestats.py    From Computable with MIT License 5 votes vote down vote up
def test_bad_arg(self):
        # Length of x is less than 3.
        x = [1]
        assert_raises(ValueError, stats.shapiro, x) 
Example #17
Source File: test_morestats.py    From Computable with MIT License 5 votes vote down vote up
def test_basic(self):
        x1 = [0.11,7.87,4.61,10.14,7.95,3.14,0.46,
              4.43,0.21,4.75,0.71,1.52,3.24,
              0.93,0.42,4.97,9.53,4.55,0.47,6.66]
        w,pw = stats.shapiro(x1)
        assert_almost_equal(w,0.90047299861907959,6)
        assert_almost_equal(pw,0.042089745402336121,6)
        x2 = [1.36,1.14,2.92,2.55,1.46,1.06,5.27,-1.11,
              3.48,1.10,0.88,-0.51,1.46,0.52,6.20,1.69,
              0.08,3.67,2.81,3.49]
        w,pw = stats.shapiro(x2)
        assert_almost_equal(w,0.9590270,6)
        assert_almost_equal(pw,0.52460,3) 
Example #18
Source File: test_qmc.py    From botorch with MIT License 4 votes vote down vote up
def test_MultivariateNormalQMCEngineShapiroInvTransform(self):
        for dtype in (torch.float, torch.double):
            # test the standard case
            mean = torch.zeros(2, device=self.device, dtype=dtype)
            cov = torch.eye(2, device=self.device, dtype=dtype)
            engine = MultivariateNormalQMCEngine(
                mean=mean, cov=cov, seed=12345, inv_transform=True
            )
            samples = engine.draw(n=250)
            self.assertEqual(samples.dtype, dtype)
            self.assertEqual(samples.device.type, self.device.type)
            self.assertTrue(torch.all(torch.abs(samples.mean(dim=0)) < 1e-2))
            self.assertTrue(torch.all(torch.abs(samples.std(dim=0) - 1) < 1e-2))
            # perform Shapiro-Wilk test for normality
            samples = samples.cpu().numpy()
            for i in (0, 1):
                _, pval = shapiro(samples[:, i])
                self.assertGreater(pval, 0.9)
            # make sure samples are uncorrelated
            cov = np.cov(samples.transpose())
            self.assertLess(np.abs(cov[0, 1]), 1e-2)

            # test the correlated, non-zero mean case
            mean = torch.tensor([1.0, 2.0], device=self.device, dtype=dtype)
            cov = torch.tensor(
                [[1.5, 0.5], [0.5, 1.5]], device=self.device, dtype=dtype
            )
            engine = MultivariateNormalQMCEngine(
                mean=mean, cov=cov, seed=12345, inv_transform=True
            )
            samples = engine.draw(n=250)
            self.assertEqual(samples.dtype, dtype)
            self.assertEqual(samples.device.type, self.device.type)
            self.assertTrue(torch.all(torch.abs(samples.mean(dim=0) - mean) < 1e-2))
            self.assertTrue(
                torch.all(torch.abs(samples.std(dim=0) - math.sqrt(1.5)) < 1e-2)
            )
            # perform Shapiro-Wilk test for normality
            samples = samples.cpu().numpy()
            for i in (0, 1):
                _, pval = shapiro(samples[:, i])
                self.assertGreater(pval, 0.9)
            # check covariance
            cov = np.cov(samples.transpose())
            self.assertLess(np.abs(cov[0, 1] - 0.5), 1e-2) 
Example #19
Source File: extract_features.py    From upsilon with MIT License 4 votes vote down vote up
def shallow_run(self):
        """Derive not-period-based features."""
        # Number of data points
        self.n_points = len(self.date)

        # Weight calculation.
        # All zero values.
        if not self.err.any():
            self.err = np.ones(len(self.mag)) * np.std(self.mag)
        # Some zero values.
        elif not self.err.all():
            np.putmask(self.err, self.err==0, np.median(self.err))

        self.weight = 1. / self.err
        self.weighted_sum = np.sum(self.weight)

        # Simple statistics, mean, median and std.
        self.mean = np.mean(self.mag)
        self.median = np.median(self.mag)
        self.std = np.std(self.mag)

        # Weighted mean and std.
        self.weighted_mean = np.sum(self.mag * self.weight) / self.weighted_sum
        self.weighted_std = np.sqrt(np.sum((self.mag - self.weighted_mean) ** 2 \
                                           * self.weight) / self.weighted_sum)

        # Skewness and kurtosis.
        self.skewness = ss.skew(self.mag)
        self.kurtosis = ss.kurtosis(self.mag)

        # Normalization-test. Shapiro-Wilk test.
        shapiro = ss.shapiro(self.mag)
        self.shapiro_w = shapiro[0]
        # self.shapiro_log10p = np.log10(shapiro[1])

        # Percentile features.
        self.quartile31 = np.percentile(self.mag, 75) \
                          - np.percentile(self.mag, 25)

        # Stetson K.
        self.stetson_k = self.get_stetson_k(self.mag, self.median, self.err)

        # Ratio between higher and lower amplitude than average.
        self.hl_amp_ratio = self.half_mag_amplitude_ratio(
            self.mag, self.median, self.weight)
        # This second function's value is very similar with the above one.
        # self.hl_amp_ratio2 = self.half_mag_amplitude_ratio2(
        #    self.mag, self.median)

        # Cusum
        self.cusum = self.get_cusum(self.mag)

        # Eta
        self.eta = self.get_eta(self.mag, self.weighted_std)