Python scipy.stats.kstest() Examples

The following are 30 code examples of scipy.stats.kstest(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function

Example #1

Source File: exponential_test.py From deep_image_model with Apache License 2.0

6 votes

def testExponentialSample(self):
    with self.test_session():
      lam = tf.constant([3.0, 4.0])
      lam_v = [3.0, 4.0]
      n = tf.constant(100000)
      exponential = tf.contrib.distributions.Exponential(lam=lam)

      samples = exponential.sample(n, seed=137)
      sample_values = samples.eval()
      self.assertEqual(sample_values.shape, (100000, 2))
      self.assertFalse(np.any(sample_values < 0.0))
      for i in range(2):
        self.assertLess(
            stats.kstest(
                sample_values[:, i], stats.expon(scale=1.0/lam_v[i]).cdf)[0],
            0.01)

Example #2

Source File: exponential_test.py From deep_image_model with Apache License 2.0

6 votes

def testExponentialSampleMultiDimensional(self):
    with self.test_session():
      batch_size = 2
      lam_v = [3.0, 22.0]
      lam = tf.constant([lam_v] * batch_size)

      exponential = tf.contrib.distributions.Exponential(lam=lam)

      n = 100000
      samples = exponential.sample(n, seed=138)
      self.assertEqual(samples.get_shape(), (n, batch_size, 2))

      sample_values = samples.eval()

      self.assertFalse(np.any(sample_values < 0.0))
      for i in range(2):
        self.assertLess(
            stats.kstest(
                sample_values[:, 0, i], stats.expon(scale=1.0/lam_v[i]).cdf)[0],
            0.01)
        self.assertLess(
            stats.kstest(
                sample_values[:, 1, i], stats.expon(scale=1.0/lam_v[i]).cdf)[0],
            0.01)

Example #3

Source File: test_1d.py From cpnest with MIT License

6 votes

def test_evidence(self):
        # 2 sigma tolerance
        tolerance = 2.0*np.sqrt(self.work.NS.state.info/self.work.NS.Nlive)
        print('2-sigma statistic error in logZ: {0:0.3f}'.format(tolerance))
        print('Analytic logZ {0}'.format(self.model.analytic_log_Z))
        print('Estimated logZ {0}'.format(self.work.NS.logZ))
        pos=self.work.posterior_samples['x']
        #t,pval=stats.kstest(pos,self.model.distr.cdf)
        stat,pval = stats.normaltest(pos.T)
        print('Normal test p-value {0}'.format(str(pval)))
        plt.figure()
        plt.hist(pos.ravel(),density=True)
        x=np.linspace(self.model.bounds[0][0],self.model.bounds[0][1],100)
        plt.plot(x,self.model.distr.pdf(x))
        plt.title('NormalTest pval = {0}'.format(pval))
        plt.savefig('posterior.png')
        plt.figure()
        plt.plot(pos.ravel(),',')
        plt.title('chain')
        plt.savefig('chain.png')
        self.assertTrue(np.abs(self.work.NS.logZ - GaussianModel.analytic_log_Z)<tolerance, 'Incorrect evidence for normalised distribution: {0:.3f} instead of {1:.3f}'.format(self.work.NS.logZ,GaussianModel.analytic_log_Z ))
        self.assertTrue(pval>0.01,'Normaltest test failed: KS stat = {0}'.format(pval))

Example #4

Source File: test_half_gaussian.py From cpnest with MIT License

6 votes

def test_evidence(self):
        # 2 sigma tolerance
        tolerance = 2.0*np.sqrt(self.work.NS.state.info/self.work.NS.Nlive)
        print('2-sigma statistic error in logZ: {0:0.3f}'.format(tolerance))
        print('Analytic logZ {0}'.format(self.model.analytic_log_Z))
        print('Estimated logZ {0}'.format(self.work.NS.logZ))
        pos=self.work.posterior_samples['x']
        #t,pval=stats.kstest(pos,self.model.distr.cdf)
        pos=self.work.posterior_samples['x']
        #t,pval=stats.kstest(pos,self.model.distr.cdf)
        plt.figure()
        plt.hist(pos.ravel(),density=True)
        x=np.linspace(self.model.bounds[0][0],self.model.bounds[0][1],100)
        plt.plot(x,2*self.model.distr.pdf(x))
        plt.savefig('posterior.png')
        plt.figure()
        plt.plot(pos.ravel(),',')
        plt.title('chain')
        plt.savefig('chain.png')
        self.assertTrue(np.abs(self.work.NS.logZ - self.model.analytic_log_Z)<tolerance, 'Incorrect evidence for normalised distribution: {0:.3f} instead of {1:.3f}'.format(self.work.NS.logZ,self.model.analytic_log_Z ))

Example #5

Source File: test_multivariate.py From GraphicDesignPatternByPython with MIT License

6 votes

def test_haar(self):
        # Test that the eigenvalues, which lie on the unit circle in
        # the complex plane, are uncorrelated.

        # Generate samples
        dim = 5
        samples = 1000  # Not too many, or the test takes too long
        np.random.seed(514)  # Note that the test is sensitive to seed too
        xs = unitary_group.rvs(dim, size=samples)

        # The angles "x" of the eigenvalues should be uniformly distributed
        # Overall this seems to be a necessary but weak test of the distribution.
        eigs = np.vstack(scipy.linalg.eigvals(x) for x in xs)
        x = np.arctan2(eigs.imag, eigs.real)
        res = kstest(x.ravel(), uniform(-np.pi, 2*np.pi).cdf)
        assert_(res.pvalue > 0.05)

Example #6

Source File: feature_selection.py From default-credit-card-prediction with MIT License

6 votes

def kolmogorov_smirnov_normality_test(X,y):
	"""
	Performs the one sample Kolmogorov-Smirnov test, testing wheter the feature values of each class are drawn from a normal distribution

	Keyword arguments:
	X -- The feature vectors
	y -- The target vector
	"""

	kolmogorov_smirnov={}
	# print kolmogorov_smirnov
	for feature_col in xrange(len(X[0])):
		kolmogorov_smirnov[feature_col]=values=[]
		for class_index in xrange(2):
			values.append(stats.kstest(X[y==class_index,feature_col], 'norm'))

		
	#debug
	for f in xrange(23):
			print kolmogorov_smirnov[f]

	return kolmogorov_smirnov

Example #7

Source File: random_test.py From tick with BSD 3-Clause "New" or "Revised" License

6 votes

def test_gaussian_random_with_bounds(self):
        """...Test gaussian random numbers simulation with mean and scale
        defined
        """
        mu = -10
        sigma = 0.5

        seeded_sample = \
            [-10.58093465, -10.31294449, -9.98125953, -10.34969085, -9.82447348]

        self._test_dist_with_seed(seeded_sample, test_gaussian, mu, sigma)

        # Statistical tests
        sample = test_gaussian(mu, sigma, self.stat_size, self.test_seed)
        p, _ = stats.kstest(sample, 'norm', (mu, sigma))
        self.assertLess(p, 0.05)

Example #8

Source File: test_continuous_null.py From diffxpy with BSD 3-Clause "New" or "Revised" License

6 votes

def _test_null_distribution_lrt(self):
        """
        Test if de.test.continuous() generates a uniform p-value distribution in lrt
        if it is given data simulated based on the null model. Returns the p-value
        of the two-side Kolmgorov-Smirnov test for equality of the observed
        p-value distriubution and a uniform distribution.

        :param n_cells: Number of cells to simulate (number of observations per test).
        :param n_genes: Number of genes to simulate (number of tests).
        """
        logging.getLogger("tensorflow").setLevel(logging.INFO)
        logging.getLogger("batchglm").setLevel(logging.INFO)
        logging.getLogger("diffxpy").setLevel(logging.WARNING)

        self.noise_model = "nb"
        np.random.seed(1)
        test = self._test_null_model(nobs=2000, ngenes=100, test="lrt", constrained=False)

        # Compare p-value distribution under null model against uniform distribution.
        pval_h0 = stats.kstest(test.pval, 'uniform').pvalue

        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
        return True

Example #9

Source File: __init__.py From python-mle with MIT License

6 votes

def kolsmi(dist, fit_result, data):
    """Perform a Kolmogorow-Smirnow-Test for goodness of fit.

    This tests the H0 hypothesis, if data is a sample of dist

    Args:
        dist:         A mle.Distribution instance
        fit_result:   The solution dict, returned by the Distribution.fit method
        data:         The data used in Distribution.fit
    Returns:
        teststat:     the test statistic, e.g. the max distance between the
                      cumulated distributions
        p-value:      the p-value, probability that dist describes the data
    """
    variables = dist.get_vars()
    if len(variables) > 1:
        raise ValueError("Kolmogorov-Smirnov-Test is only valid for 1d distributions")
    var = variables[0]
    teststat, pvalue = stats.kstest(data[var.name], lambda x: dist.cdf(x, **fit_result["x"]))
    return teststat, pvalue

Example #10

Source File: initialization.py From minian with GNU General Public License v3.0

6 votes

def ks_refine(varr, seeds, sig=0.05):
    print("selecting seeds")
    varr_sub = varr.sel(
        spatial=[tuple(hw) for hw in seeds[['height', 'width']].values])
    print("performing KS test")
    ks = xr.apply_ufunc(
        lambda x: kstest(zscore(x), 'norm')[1],
        varr_sub.chunk(dict(frame=-1, spatial='auto')),
        input_core_dims=[['frame']],
        vectorize=True,
        dask='parallelized',
        output_dtypes=[float])
    mask = ks < sig
    mask_df = mask.to_pandas().rename('mask_ks').reset_index()
    seeds = pd.merge(seeds, mask_df, on=['height', 'width'], how='left')
    return seeds

Example #11

Source File: utils.py From AIF360 with Apache License 2.0

6 votes

def checkNormalFit(x_train, y_train, x_control_train):
	train = []
	for i in range(0, len(y_train)):
		temp1 = np.append(x_train[i], y_train[i])
		temp2 = np.append(temp1, x_control_train[i])
		train.append(temp2)

	mean = np.mean(train, axis=0)
	cov = np.cov(train, rowvar=0)
	l = len(mean) - 2
	for i in range(0, l):
		for j in range(0, l):
			if i != j:
				cov[i][j] = 0

	for i in range(0, len(train[0])):
		data = []
		for elem in train:
			data.append(elem[i])

		def cdf(x):
			return st.norm.cdf(x, mean[i], math.sqrt(cov[i][i]))

		print(st.kstest(data, cdf))

Example #12

Source File: sampling_templates.py From pyclustering with GNU General Public License v3.0

5 votes

def uniform_distribution(data, n, algorithm, repeat, supremum_cdf=0.06, ccore=True):
        # Supremum CDF < 0.06 is almost about uniform distribution (for R algorithm).
        # Supremum CDF < 0.4 is for X algorithm
        min_value = min(data)
        max_value = max(data)
        scale = max_value - min_value

        stream = collections.deque()
        for _ in range(repeat):
            stream += algorithm(data, n)

        D, pvalue = stats.kstest(stream, stats.uniform(loc=min_value, scale=scale).cdf)
        assertion.gt(supremum_cdf, D)

Example #13

Source File: test_twosample.py From diffxpy with BSD 3-Clause "New" or "Revised" License

5 votes

def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 100, n_groups: int = 2):
        """
        Test if de.test_wald_loc() generates a uniform p-value distribution
        if it is given data simulated based on the null model. Returns the p-value
        of the two-side Kolmgorov-Smirnov test for equality of the observed
        p-value distriubution and a uniform distribution.

        :param n_cells: Number of cells to simulate (number of observations per test).
        :param n_genes: Number of genes to simulate (number of tests).
        """
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        logging.getLogger("batchglm").setLevel(logging.WARNING)
        logging.getLogger("diffxpy").setLevel(logging.WARNING)
        from batchglm.api.models.numpy.glm_nb import Simulator

        sim = Simulator(num_observations=n_cells, num_features=n_genes)
        sim.generate_sample_description(num_batches=0, num_conditions=0)
        sim.generate()

        random_sample_description = pd.DataFrame({
            "condition": np.random.randint(n_groups, size=sim.nobs)
        })

        test = de.test.two_sample(
            data=sim.input_data,
            grouping=random_sample_description["condition"],
            test="wald",
            noise_model="nb",
        )
        summary = test.summary()

        # Compare p-value distribution under null model against uniform distribution.
        pval_h0 = stats.kstest(test.pval.flatten(), 'uniform').pvalue

        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of test_wald_loc(): %f' % pval_h0)
        assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5)

        return True

Example #14

Source File: test_stats.py From diffxpy with BSD 3-Clause "New" or "Revised" License

5 votes

def test_lrt(self, df: int = 3, n: int = 1000):
        """
        Test if de.stats.likelihood_ratio_test() generates a uniform p-value distribution
        if it is given test statistics sampled from the null model. Returns the p-value
        of the two-side Kolmgorov-Smirnov test for equality of the observed 
        p-value distriubution and a uniform distribution.

        :param n: Number of tests to run.
        :param df: Difference in degrees of freedom between null and alternative model.
        """
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        logging.getLogger("batchglm").setLevel(logging.WARNING)
        logging.getLogger("diffxpy").setLevel(logging.WARNING)
        
        # Draw chi-square distributed deviance which is the statistic 
        # distributed under the null hypothesis:
        # dev = 2 * (ll_full - ll_reduced)
        dev = np.random.chisquare(df=df, size=n)
        
        # Set ll_full, ll_red and df_full and df_red so that the correct
        # deviance is computed within likelihood_ratio_test().
        ll_full = dev / 2
        ll_red = np.zeros_like(ll_full)
        
        # Compute p-value distribution under null model.
        pvals = de.stats.likelihood_ratio_test(ll_full=ll_full, ll_reduced=ll_red, df_full=df, df_reduced=0)
        
        # Compare p-value distribution under null model against uniform distribution.
        pval_h0 = stats.kstest(pvals, 'uniform').pvalue

        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of likelihood_ratio_test(): %f' % pval_h0)
        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"

        return True

Example #15

Source File: test_partition.py From diffxpy with BSD 3-Clause "New" or "Revised" License

5 votes

def test_null_distribution_rank(self, n_cells: int = 4000, n_genes: int = 200):
        """
        Test if rank_test() generates a uniform p-value distribution
        if it is given data simulated based on the null model. Returns the p-value
        of the two-side Kolmgorov-Smirnov test for equality of the observed
        p-value distribution and a uniform distribution.

        :param n_cells: Number of cells to simulate (number of observations per test).
        :param n_genes: Number of genes to simulate (number of tests).
        """
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        logging.getLogger("batchglm").setLevel(logging.WARNING)
        logging.getLogger("diffxpy").setLevel(logging.WARNING)

        sim = Simulator(num_observations=n_cells, num_features=n_genes)
        sim.generate_sample_description(num_batches=0, num_conditions=2)
        sim.generate()

        sample_description = pd.DataFrame({
            "covar1": np.random.randint(2, size=sim.nobs)
        })
        sample_description["cond"] = sim.sample_description["condition"].values

        partition = de.test.partition(
            data=sim.x,
            parts="cond",
            sample_description=sample_description
        )
        det = partition.rank_test(
            grouping="covar1",
            dtype="float64"
        )
        summary = det.summary()

        # Compare p-value distribution under null model against uniform distribution.
        pval_h0 = stats.kstest(det.pval.flatten(), 'uniform').pvalue

        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of rank_test(): %f' % pval_h0)
        assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5)

        return True

Example #16

Source File: test_stats.py From diffxpy with BSD 3-Clause "New" or "Revised" License

5 votes

def test_t_test_raw(self, n: int = 1000, n_test: int = 100):
        """
        Test if de.stats.t_test_raw() generates a uniform p-value distribution
        if it is given data sampled from the null model. Returns the p-value
        of the two-side Kolmgorov-Smirnov test for equality of the observed 
        p-value distriubution and a uniform distribution.

        :param n: int
            Number of tests to run.
        :param n_test: int
            Sample size of each group in each test.
        """
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        logging.getLogger("batchglm").setLevel(logging.WARNING)
        logging.getLogger("diffxpy").setLevel(logging.WARNING)
        
        # Draw sample distribution parameters for each test:
        locs = np.random.normal(loc=0, scale=1, size=n)
        scales = np.exp(np.random.normal(loc=0, scale=0.5, size=n))
        
        # Draw two sets of samples  estimates for each test:
        x0 = np.vstack([np.random.normal(loc=locs[i], scale=scales[i], size=n_test) for i in range(n)]).T
        x1 = np.vstack([np.random.normal(loc=locs[i], scale=scales[i], size=n_test) for i in range(n)]).T
        
        # Compute p-value distribution under null model.
        pvals = de.stats.t_test_raw(x0=x0, x1=x1)
        
        # Compare p-value distribution under null model against uniform distribution.
        pval_h0 = stats.kstest(pvals, 'uniform').pvalue

        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of t_test_raw(): %f' % pval_h0)
        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"

        return True

Example #17

Source File: test_stats.py From diffxpy with BSD 3-Clause "New" or "Revised" License

5 votes

def test_wilcoxon(self, n: int = 1000, n_test: int = 100):
        """
        Test if de.stats.wilcoxon() generates a uniform p-value distribution
        if it is given data sampled from the null model. Returns the p-value
        of the two-side Kolmgorov-Smirnov test for equality of the observed 
        p-value distriubution and a uniform distribution.

        :param n: Number of tests to run.
        :param n_test: Sample size of each group in each test.
        """
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        logging.getLogger("batchglm").setLevel(logging.WARNING)
        logging.getLogger("diffxpy").setLevel(logging.WARNING)
        
        # Draw sample distribution parameters for each test:
        locs = np.random.normal(loc=0, scale=1, size=n)
        scales = np.exp(np.random.normal(loc=0, scale=0.5, size=n))
        
        # Draw two sets of samples  estimates for each test:
        x0 = np.vstack([np.random.normal(loc=locs[i], scale=scales[i], size=n_test) for i in range(n)]).T
        x1 = np.vstack([np.random.normal(loc=locs[i], scale=scales[i], size=n_test) for i in range(n)]).T
        
        # Compute p-value distribution under null model.
        pvals = de.stats.mann_whitney_u_test(x0=x0, x1=x1)
        
        # Compare p-value distribution under null model against uniform distribution.
        pval_h0 = stats.kstest(pvals, 'uniform').pvalue

        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wilcoxon(): %f' % pval_h0)
        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"

        return True

Example #18

Source File: test_stats.py From diffxpy with BSD 3-Clause "New" or "Revised" License

5 votes

def test_z_test(self, n: int = 1000):
        """
        Test if de.stats.two_coef_z_test() generates a uniform p-value distribution
        if it is given test statistics sampled from the null model. Returns the p-value
        of the two-side Kolmgorov-Smirnov test for equality of the observed 
        p-value distriubution and a uniform distribution.

        :param n: Number of tests to run.
        """
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        logging.getLogger("batchglm").setLevel(logging.WARNING)
        logging.getLogger("diffxpy").setLevel(logging.WARNING)
        
        # Draw parameter posteriors for each test:
        theta_mles = np.random.normal(loc=0, scale=1, size=n)
        theta_sds = np.exp(np.random.normal(loc=0, scale=0.5, size=n))
        
        # Draw two estimates from each posterior:
        theta_mle0 = np.random.normal(loc=theta_mles, scale=theta_sds)
        theta_mle1 = np.random.normal(loc=theta_mles, scale=theta_sds)
        
        # Compute p-value distribution under null model.
        pvals = de.stats.two_coef_z_test(theta_mle0=theta_mle0, theta_mle1=theta_mle1, theta_sd0=theta_sds,
                                         theta_sd1=theta_sds)
        
        # Compare p-value distribution under null model against uniform distribution.
        pval_h0 = stats.kstest(pvals, 'uniform').pvalue

        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of z_test(): %f' % pval_h0)
        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"

        return True

Example #19

Source File: test_stats.py From diffxpy with BSD 3-Clause "New" or "Revised" License

5 votes

def test_wald(self, n: int = 1000):
        """
        Test if de.stats.wald() generates a uniform p-value distribution
        if it is given test statistics sampled from the null model. Returns the p-value
        of the two-side Kolmgorov-Smirnov test for equality of the observed 
        p-value distriubution and a uniform distribution.

        :param n: Number of tests to run.
        """
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        logging.getLogger("batchglm").setLevel(logging.WARNING)
        logging.getLogger("diffxpy").setLevel(logging.WARNING)
        
        # Draw standard normal distributed estimate which is sampled
        # from the parameter posterior under the null model:
        mles = np.random.normal(loc=0, scale=1, size=n)
        sd = np.zeros([n]) + 1
        
        # Compute p-value distribution under null model.
        pvals = de.stats.wald_test(theta_mle=mles, theta_sd=sd, theta0=0)
        
        # Compare p-value distribution under null model against uniform distribution.
        pval_h0 = stats.kstest(pvals, 'uniform').pvalue

        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"

        return True

Example #20

Source File: test_twosample.py From diffxpy with BSD 3-Clause "New" or "Revised" License

5 votes

def test_null_distribution_ttest(self, n_cells: int = 2000, n_genes: int = 100, n_groups: int = 2):
        """
        Test if de.test_wald_loc() generates a uniform p-value distribution
        if it is given data simulated based on the null model. Returns the p-value
        of the two-side Kolmgorov-Smirnov test for equality of the observed
        p-value distriubution and a uniform distribution.

        :param n_cells: Number of cells to simulate (number of observations per test).
        :param n_genes: Number of genes to simulate (number of tests).
        """
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        logging.getLogger("batchglm").setLevel(logging.WARNING)
        logging.getLogger("diffxpy").setLevel(logging.WARNING)
        from batchglm.api.models.numpy.glm_nb import Simulator

        sim = Simulator(num_observations=n_cells, num_features=n_genes)
        sim.generate_sample_description(num_batches=0, num_conditions=0)
        sim.generate()

        random_sample_description = pd.DataFrame({
            "condition": np.random.randint(n_groups, size=sim.nobs)
        })

        test = de.test.two_sample(
            data=sim.input_data,
            grouping=random_sample_description["condition"],
            test="t_test"
        )
        summary = test.summary()

        # Compare p-value distribution under null model against uniform distribution.
        pval_h0 = stats.kstest(test.pval.flatten(), 'uniform').pvalue

        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of test_wald_loc(): %f' % pval_h0)
        assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5)

        return True

Example #21

Source File: microlstats.py From pyLIMA with GNU General Public License v3.0

5 votes

def normal_Kolmogorov_Smirnov(sample):
    """The moon illumination expressed as a percentage.

                :param astropy sun: the sun ephemeris
                :param astropy moon: the moon ephemeris

                :return: a numpy array like indicated the moon illumination.

                :rtype: array_like

    """

    mu, sigma = ss.norm.fit(sample)
    #use mu sigma for anomaly, 0,1 for rescaling???
    KS_stat, KS_pvalue = ss.kstest(sample, 'norm', args=(0, 1))

    # the sample is likely Gaussian-like if KS_stat (~ maximum distance between sample and theoritical distribution) -> 0
    # the null hypothesis can not be rejected ( i.e the distribution of sample come from a Gaussian) if KS_pvalue -> 1

    KS_judgement = 0

    if KS_pvalue > 0.01:

        KS_judgement = 1

    if KS_pvalue > 0.05:

        KS_judgement = 2

    return KS_stat, KS_pvalue, KS_judgement

Example #22

Source File: gof_new.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def bootstrap2(value, distr, args=(), nobs=200, nrep=100):
    '''Monte Carlo (or parametric bootstrap) p-values for gof

    currently hardcoded for A^2 only

    non vectorized, loops over all parametric bootstrap replications and calculates
    and returns specific p-value,

    rename function to less generic

    '''
    #signature similar to kstest ?
    #delegate to fn ?

    #rvs_kwds = {'size':(nobs, nrep)}
    #rvs_kwds.update(kwds)


    count = 0
    for irep in range(nrep):
        #rvs = distr.rvs(args, **kwds)  #extension to distribution kwds ?
        rvs = distr.rvs(args, **{'size':nobs})
        params = distr.fit_vec(rvs)
        cdfvals = np.sort(distr.cdf(rvs, params))
        stat = asquare(cdfvals, axis=0)
        count += (stat >= value)
    return count * 1. / nrep

Example #23

Source File: test_sample.py From pyPESTO with BSD 3-Clause "New" or "Revised" License

5 votes

def test_ground_truth():
    """Test whether we actually retrieve correct distributions."""
    # use best self-implemented sampler, which has a chance of correctly
    # sample from the distribution
    sampler = sample.AdaptiveParallelTemperingSampler(
        internal_sampler=sample.AdaptiveMetropolisSampler(), n_chains=5)

    problem = gaussian_problem()

    result = optimize.minimize(problem)

    result = sample.sample(problem, n_samples=10000,
                           result=result, sampler=sampler)

    # get samples of first chain
    samples = result.sample_result.trace_x[0].flatten()

    # test against different distributions

    statistic, pval = kstest(samples, 'norm')
    print(statistic, pval)
    assert statistic < 0.1

    statistic, pval = kstest(samples, 'uniform')
    print(statistic, pval)
    assert statistic > 0.1

Example #24

Source File: unittests_utils.py From Conditional_Density_Estimation with MIT License

5 votes

def test_batched_student_t_rvs(self):
    np.random.seed(123)
    n = 5000
    locs = np.ones(n) * 5
    scales = np.ones(n) * 2
    dofs = np.ones(n) * 4

    rvs = batched_univ_t_rvs(locs, scales, dofs)

    cdf_callable = lambda y: stats.t.cdf(y, df=4, loc=5, scale=2)
    _, p_val = stats.kstest(rvs, cdf_callable)
    print("P-Val Kolmogorov:", p_val)

    self.assertGreaterEqual(p_val, 0.1)

Example #25

Source File: tests.py From nninit with MIT License

5 votes

def _is_uniform(self, tensor, a, b):
        if isinstance(tensor, Variable):
            tensor = tensor.data
        p_value = stats.kstest(tensor.numpy().flatten(), 'uniform', args=(a, (b - a))).pvalue
        return p_value > 0.0001

Example #26

Source File: tests.py From nninit with MIT License

5 votes

def _is_normal(self, tensor, mean, std):
        if isinstance(tensor, Variable):
            tensor = tensor.data
        p_value = stats.kstest(tensor.numpy().flatten(), 'norm', args=(mean, std)).pvalue
        return p_value > 0.0001

Example #27

Source File: test_init_methods.py From neupy with MIT License

5 votes

def assertUniformlyDistributed(self, value):
        self.assertTrue(stats.kstest(value.ravel(), 'uniform'),
                        msg="Sampled distribution is not uniformal")

Example #28

Source File: test_random.py From chainerrl with MIT License

5 votes

def subtest_normal_distrib(self, xs, mean, std):
        _, pvalue = stats.kstest(xs, 'norm', (mean, std))
        self.assertGreater(pvalue, 3e-3)

Example #29

Source File: trigger_fits.py From pycbc with GNU General Public License v3.0

5 votes

def KS_test(distr, vals, alpha, thresh=None):
    """
    Perform Kolmogorov-Smirnov test for fitted distribution

    Compare the given set of discrete values above a given threshold to the
    fitted distribution function.
    If no threshold is specified, the minimum sample value will be used.
    Returns the KS test statistic and its p-value: lower p means less
    probable under the hypothesis of a perfect fit

    Parameters
    ----------
    distr : {'exponential', 'rayleigh', 'power'}
        Name of distribution
    vals : sequence of floats
        Values to compare to fit
    alpha : float
        Fitted distribution parameter
    thresh : float
        Threshold to apply before fitting; if None, use min(vals)

    Returns
    -------
    D : float
        KS test statistic
    p-value : float
        p-value, assumed to be two-tailed
    """
    vals = numpy.array(vals)
    if thresh is None:
        thresh = min(vals)
    else:
        vals = vals[vals >= thresh]
    def cdf_fn(x):
        return 1 - cum_fndict[distr](x, alpha, thresh)
    return kstest(vals, cdf_fn)

Example #30

Source File: test_twosample.py From diffxpy with BSD 3-Clause "New" or "Revised" License

5 votes

def test_null_distribution_wald(self, n_cells: int = 2000, n_genes: int = 100, n_groups: int = 2):
        """
        Test if de.test_wald_loc() generates a uniform p-value distribution
        if it is given data simulated based on the null model. Returns the p-value
        of the two-side Kolmgorov-Smirnov test for equality of the observed
        p-value distriubution and a uniform distribution.

        :param n_cells: Number of cells to simulate (number of observations per test).
        :param n_genes: Number of genes to simulate (number of tests).
        """
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        logging.getLogger("batchglm").setLevel(logging.WARNING)
        logging.getLogger("diffxpy").setLevel(logging.WARNING)
        from batchglm.api.models.numpy.glm_nb import Simulator

        sim = Simulator(num_observations=n_cells, num_features=n_genes)
        sim.generate_sample_description(num_batches=0, num_conditions=0)
        sim.generate()

        random_sample_description = pd.DataFrame({
            "condition": np.random.randint(n_groups, size=sim.nobs)
        })

        test = de.test.two_sample(
            data=sim.input_data,
            grouping=random_sample_description["condition"].values,
            test="wald",
            noise_model="nb",
        )
        summary = test.summary()

        # Compare p-value distribution under null model against uniform distribution.
        pval_h0 = stats.kstest(test.pval.flatten(), 'uniform').pvalue

        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of test_wald_loc(): %f' % pval_h0)
        assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5)

        return True