Python scipy.stats.ks_2samp() Examples

The following are code examples for showing how to use scipy.stats.ks_2samp(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
 Project: GeoPy   Author: aerler   File: stats.py    GNU General Public License v3.0 6 votes
```def ks_2samp_wrapper(data, size1=None, ignoreNaN=True):
''' Apply the Kolmogorov-Smirnov Test, to test whether two samples are drawn from the same
underlying (continuous) distribution. This is a wrapper for the SciPy function that
removes NaN's, allows application over a field, and only returns the p-value. '''
if ignoreNaN:
data1 = data[:size1]; data2 = data[size1:]
nonans1 = np.invert(np.isnan(data1)) # test for NaN's
nonans2 = np.invert(np.isnan(data2))
if np.sum(nonans1) < 3 or np.sum(nonans2) < 3: return np.NaN # return, if less than 3 non-NaN's
data1 = data1[nonans1]; data2 = data2[nonans2] # remove NaN's
else:
data1 = data[:size1]; data2 = data[size1:]
# apply test
D, pval = ss.ks_2samp(data1, data2); del D
return pval

# Stundent's T-test for two independent samples ```
Example 2
 Project: ble5-nrf52-mac   Author: tomasero   File: test_multivariate.py    MIT License 6 votes
```def test_pairwise_distances(self):
# Test that the distribution of pairwise distances is close to correct.
np.random.seed(514)

def random_ortho(dim):
u, _s, v = np.linalg.svd(np.random.normal(size=(dim, dim)))
return np.dot(u, v)

for dim in range(2, 6):
def generate_test_statistics(rvs, N=1000, eps=1e-10):
stats = np.array([
np.sum((rvs(dim=dim) - rvs(dim=dim))**2)
for _ in range(N)
])
# Add a bit of noise to account for numeric accuracy.
stats += np.random.uniform(-eps, eps, size=stats.shape)
return stats

expected = generate_test_statistics(random_ortho)
actual = generate_test_statistics(scipy.stats.ortho_group.rvs)

_D, p = scipy.stats.ks_2samp(expected, actual)

assert_array_less(.05, p) ```
Example 3
 Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 6 votes
```def testMiddlingBoth(self):
# 500, 600
n1, n2 = 500, 600
delta = 1.0/n1/n2/2/2
x = np.linspace(1, 200, n1) - delta
y = np.linspace(2, 200, n2)
self._testOne(x, y, 'two-sided', 2000.0 / n1 / n2, 1.0, mode='auto')
self._testOne(x, y, 'two-sided', 2000.0 / n1 / n2, 1.0, mode='asymp')
self._testOne(x, y, 'greater', 2000.0 / n1 / n2, 0.9697596024683929, mode='asymp')
self._testOne(x, y, 'less', 500.0 / n1 / n2, 0.9968735843165021, mode='asymp')
with suppress_warnings() as sup:
sup.filter(RuntimeWarning, "ks_2samp: Exact calculation overflowed. Switching to mode=asymp")
self._testOne(x, y, 'greater', 2000.0 / n1 / n2, 0.9697596024683929, mode='exact')
self._testOne(x, y, 'less', 500.0 / n1 / n2, 0.9968735843165021, mode='exact')
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
self._testOne(x, y, 'less', 500.0 / n1 / n2, 0.9968735843165021, mode='exact')
_check_warnings(w, RuntimeWarning, 1) ```
Example 4
 Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 6 votes
```def testMediumBoth(self):
# 1000, 1100
n1, n2 = 1000, 1100
delta = 1.0/n1/n2/2/2
x = np.linspace(1, 200, n1) - delta
y = np.linspace(2, 200, n2)
self._testOne(x, y, 'two-sided', 6600.0 / n1 / n2, 1.0, mode='asymp')
self._testOne(x, y, 'two-sided', 6600.0 / n1 / n2, 1.0, mode='auto')
self._testOne(x, y, 'greater', 6600.0 / n1 / n2, 0.9573185808092622, mode='asymp')
self._testOne(x, y, 'less', 1000.0 / n1 / n2, 0.9982410869433984, mode='asymp')

with suppress_warnings() as sup:
sup.filter(RuntimeWarning, "ks_2samp: Exact calculation overflowed. Switching to mode=asymp")
self._testOne(x, y, 'greater', 6600.0 / n1 / n2, 0.9573185808092622, mode='exact')
self._testOne(x, y, 'less', 1000.0 / n1 / n2, 0.9982410869433984, mode='exact')
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
self._testOne(x, y, 'less', 1000.0 / n1 / n2, 0.9982410869433984, mode='exact')
_check_warnings(w, RuntimeWarning, 1) ```
Example 5
 Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 6 votes
```def testLargeBoth(self):
# 10000, 11000
n1, n2 = 10000, 11000
lcm = n1*11.0
delta = 1.0/n1/n2/2/2
x = np.linspace(1, 200, n1) - delta
y = np.linspace(2, 200, n2)
self._testOne(x, y, 'two-sided', 563.0 / lcm, 0.99915729949018561, mode='asymp')
self._testOne(x, y, 'two-sided', 563.0 / lcm, 1.0, mode='exact')
self._testOne(x, y, 'two-sided', 563.0 / lcm, 0.99915729949018561, mode='auto')
self._testOne(x, y, 'greater', 563.0 / lcm, 0.7561851877420673)
self._testOne(x, y, 'less', 10.0 / lcm, 0.9998239693191724)
with suppress_warnings() as sup:
sup.filter(RuntimeWarning, "ks_2samp: Exact calculation overflowed. Switching to mode=asymp")
self._testOne(x, y, 'greater', 563.0 / lcm, 0.7561851877420673, mode='exact')
self._testOne(x, y, 'less', 10.0 / lcm, 0.9998239693191724, mode='exact') ```
Example 6
 Project: P3_image_processing   Author: latedude2   File: test_multivariate.py    MIT License 6 votes
```def test_pairwise_distances(self):
# Test that the distribution of pairwise distances is close to correct.
np.random.seed(514)

def random_ortho(dim):
u, _s, v = np.linalg.svd(np.random.normal(size=(dim, dim)))
return np.dot(u, v)

for dim in range(2, 6):
def generate_test_statistics(rvs, N=1000, eps=1e-10):
stats = np.array([
np.sum((rvs(dim=dim) - rvs(dim=dim))**2)
for _ in range(N)
])
# Add a bit of noise to account for numeric accuracy.
stats += np.random.uniform(-eps, eps, size=stats.shape)
return stats

expected = generate_test_statistics(random_ortho)
actual = generate_test_statistics(scipy.stats.ortho_group.rvs)

_D, p = scipy.stats.ks_2samp(expected, actual)

assert_array_less(.05, p) ```
Example 7
 Project: ssbio   Author: SBRG   File: atlas3.py    MIT License 6 votes
```def get_pca_ks_stats(self, maxrange=5):
"""Get a dictionary of PC#: K-S test stat for each """
pc_to_phenotype_pairs = {}
num_components = self.principal_observations_df.shape[1]
if num_components < maxrange:
maxrange = num_components

phenotypes = self.principal_observations_df.phenotype.unique().tolist()
for i in range(0, maxrange):
phenotype_pair_to_ks = {}
for p1, p2 in combinations(phenotypes, 2):
p1_pc = self.principal_observations_df[self.principal_observations_df.phenotype == p1].iloc[:,i].as_matrix()
p2_pc = self.principal_observations_df[self.principal_observations_df.phenotype == p2].iloc[:,i].as_matrix()
phenotype_pair_to_ks[(p1, p2)] = ks_2samp(p1_pc, p2_pc)
pc_to_phenotype_pairs[i + 1] = phenotype_pair_to_ks

return pc_to_phenotype_pairs ```
Example 8
 Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_multivariate.py    MIT License 6 votes
```def test_pairwise_distances(self):
# Test that the distribution of pairwise distances is close to correct.
np.random.seed(514)

def random_ortho(dim):
u, _s, v = np.linalg.svd(np.random.normal(size=(dim, dim)))
return np.dot(u, v)

for dim in range(2, 6):
def generate_test_statistics(rvs, N=1000, eps=1e-10):
stats = np.array([
np.sum((rvs(dim=dim) - rvs(dim=dim))**2)
for _ in range(N)
])
# Add a bit of noise to account for numeric accuracy.
stats += np.random.uniform(-eps, eps, size=stats.shape)
return stats

expected = generate_test_statistics(random_ortho)
actual = generate_test_statistics(scipy.stats.ortho_group.rvs)

_D, p = scipy.stats.ks_2samp(expected, actual)

assert_array_less(.05, p) ```
Example 9
 Project: failing-loudly   Author: steverab   File: shift_tester.py    MIT License 6 votes
```def one_dimensional_test(self, X_tr, X_te):
p_vals = []

# For each dimension we conduct a separate KS test
for i in range(X_tr.shape[1]):
feature_tr = X_tr[:, i]
feature_te = X_te[:, i]

t_val, p_val = None, None

if self.ot == OnedimensionalTest.KS:

# Compute KS statistic and p-value
t_val, p_val = ks_2samp(feature_tr, feature_te)
elif self.ot == OnedimensionalTest.AD:
t_val, _, p_val = anderson_ksamp([feature_tr.tolist(), feature_te.tolist()])

p_vals.append(p_val)

# Apply the Bonferroni correction to bound the family-wise error rate. This can be done by picking the minimum
# p-value from all individual tests.
p_vals = np.array(p_vals)
p_val = min(np.min(p_vals), 1.0)

return p_val, p_vals ```
Example 10
 Project: default-credit-card-prediction   Author: alexpnt   File: feature_selection.py    MIT License 6 votes
```def kolmogorov_smirnov_two_sample_test(X,y):
"""
Performs the two sample Kolmogorov-Smirnov test, testing wheter feature values of each class are drawn from identical distributions

Keyword arguments:
X -- The feature vectors
y -- The target vector
"""

kolmogorov_smirnov=[[(0,0)]]*len(X[0])
# print kolmogorov_smirnov
for feature_col in xrange(len(X[0])):
ks_test_statistic,p_value=stats.ks_2samp(X[y==0,feature_col],X[y==1,feature_col])
kolmogorov_smirnov[feature_col]=(ks_test_statistic,p_value)

#debug
for f in xrange(23):
print kolmogorov_smirnov[f]

return kolmogorov_smirnov ```
Example 11
 Project: causallib   Author: IBM   File: stat_utils.py    Apache License 2.0 6 votes
```def calc_weighted_ks2samp(x, y, wx, wy):
"""
Weighted Kolmogorov-Smirnov

References:
[1] https://stackoverflow.com/a/40059727
"""
x_ix = np.argsort(x)
y_ix = np.argsort(y)
x, wx = x[x_ix], wx[x_ix]
y, wy = y[y_ix], wy[y_ix]
data = np.concatenate((x, y))
wx_cum = np.hstack([0, wx.cumsum() / wx.sum()])
wy_cum = np.hstack([0, wy.cumsum() / wy.sum()])
# Align the "steps" between the two distribution so the differences will be well defined:
x_align = wx_cum[[np.searchsorted(x, data, side="right")]]
y_align = wy_cum[[np.searchsorted(y, data, side="right")]]
stat = np.max(np.abs(x_align - y_align))
# stat = ks_2samp(wx * x, wy * y)
return stat ```
Example 12
 Project: GeoPy   Author: aerler   File: stats.py    GNU General Public License v3.0 5 votes
```def ks_2samp(sample1, sample2, lstatistic=False, ignoreNaN=True, **kwargs):
''' Apply the Kolmogorov-Smirnov Test, to test whether two samples are drawn from the same
underlying (continuous) distribution; a high p-value means, the two samples are likely
drawn from the same distribution.
The Kolmogorov-Smirnov Test is a non-parametric test that works well for all types of
distributions (normal and non-normal). '''
if lstatistic: raise NotImplementedError("Return of test statistic is not yet implemented; only p-values are returned.")
testfct = functools.partial(ks_2samp_wrapper, ignoreNaN=ignoreNaN)
pvar = apply_stat_test_2samp(sample1, sample2, fct=testfct, laax=True,
lpval=True, lrho=False, **kwargs)
return pvar ```
Example 13
 Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 5 votes
```def test_ks_2samp():
# exact small sample solution
data1 = np.array([1.0,2.0])
data2 = np.array([1.0,2.0,3.0])
assert_almost_equal(np.array(stats.ks_2samp(data1+0.01,data2)),
np.array((0.33333333333333337, 0.99062316386915694)))
assert_almost_equal(np.array(stats.ks_2samp(data1-0.01,data2)),
np.array((0.66666666666666674, 0.42490954988801982)))
# these can also be verified graphically
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2+0.1)),
np.array((0.030000000000000027, 0.99999999996005062)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2-0.1)),
np.array((0.020000000000000018, 0.99999999999999933)))
# these are just regression tests
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20.1)),
np.array((0.21090909090909091, 0.015880386730710221)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20-0.1)),
np.array((0.20818181818181825, 0.017981441789762638)))

# test for namedtuple attribute results
attributes = ('statistic', 'pvalue')
res = stats.ks_2samp(data1 - 0.01, data2)
check_named_results(res, attributes) ```
Example 14
 Project: LaserTOF   Author: kyleuckert   File: test_multivariate.py    MIT License 5 votes
```def test_haar(self):
# Test that the distribution is constant under rotation
# Every column should have the same distribution
# Additionally, the distribution should be invariant under another rotation

# Generate samples
dim = 5
samples = 1000  # Not too many, or the test takes too long
ks_prob = 0.39  # ...so don't expect much precision
np.random.seed(514)
xs = special_ortho_group.rvs(dim, size=samples)

# Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
#   effectively picking off entries in the matrices of xs.
#   These projections should all have the same disribution,
#     establishing rotational invariance. We use the two-sided
#     KS test to confirm this.
#   We could instead test that angles between random vectors
#     are uniformly distributed, but the below is sufficient.
#   It is not feasible to consider all pairs, so pick a few.
els = ((0,0), (0,2), (1,4), (2,3))
#proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
assert_array_less([ks_prob]*len(pairs), ks_tests) ```
Example 15
 Project: LaserTOF   Author: kyleuckert   File: test_multivariate.py    MIT License 5 votes
```def test_haar(self):
# Test that the distribution is constant under rotation
# Every column should have the same distribution
# Additionally, the distribution should be invariant under another rotation

# Generate samples
dim = 5
samples = 1000  # Not too many, or the test takes too long
ks_prob = 0.39  # ...so don't expect much precision
np.random.seed(518)  # Note that the test is sensitive to seed too
xs = ortho_group.rvs(dim, size=samples)

# Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
#   effectively picking off entries in the matrices of xs.
#   These projections should all have the same disribution,
#     establishing rotational invariance. We use the two-sided
#     KS test to confirm this.
#   We could instead test that angles between random vectors
#     are uniformly distributed, but the below is sufficient.
#   It is not feasible to consider all pairs, so pick a few.
els = ((0,0), (0,2), (1,4), (2,3))
#proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
assert_array_less([ks_prob]*len(pairs), ks_tests) ```
Example 16
 Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_stats.py    GNU General Public License v3.0 5 votes
```def test_ks_2samp():
# exact small sample solution
data1 = np.array([1.0,2.0])
data2 = np.array([1.0,2.0,3.0])
assert_almost_equal(np.array(stats.ks_2samp(data1+0.01,data2)),
np.array((0.33333333333333337, 0.99062316386915694)))
assert_almost_equal(np.array(stats.ks_2samp(data1-0.01,data2)),
np.array((0.66666666666666674, 0.42490954988801982)))
# these can also be verified graphically
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2+0.1)),
np.array((0.030000000000000027, 0.99999999996005062)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2-0.1)),
np.array((0.020000000000000018, 0.99999999999999933)))
# these are just regression tests
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20.1)),
np.array((0.21090909090909091, 0.015880386730710221)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20-0.1)),
np.array((0.20818181818181825, 0.017981441789762638))) ```
Example 17
 Project: 3DCT   Author: coraxx   File: beadPos.py    GNU General Public License v3.0 5 votes
```def gaussfit(data,parent=None,hold=False):
## Fitting gaussian to data
data[1] = data[1]-data[1].min()
p0 = [data[1].max(), data[1].argmax(), 1]
popt, pcov = curve_fit(gauss, data[0], data[1], p0=p0)

if parent is not None:
## Draw graphs in GUI
x = []
y = []
for i in np.arange(len(data[0])):
x.append(i)
y.append(gauss(i,*popt))
if hold is False:
parent.widget_matplotlib.setupScatterCanvas(width=4,height=4,dpi=52,toolbar=False)
parent.widget_matplotlib.xyPlot(data[0], data[1], label='z data',clear=True)
parent.widget_matplotlib.xyPlot(x, y, label='gaussian fit',clear=False)

## DEBUG
if clrmsg and debug is True:
from scipy.stats import ks_2samp
## Get std from the diagonal of the covariance matrix
std_height, std_mean, std_sigma = np.sqrt(np.diag(pcov))
print clrmsg.DEBUG + '='*15, 'GAUSS FIT', '='*25
print clrmsg.DEBUG + 'Amplitude		:', popt[0]
print clrmsg.DEBUG + 'Location		:', popt[1]
## http://mathworld.wolfram.com/GaussianFunction.html -> sigma * 2 * sqrt(2 * ln(2))
print clrmsg.DEBUG + 'FWHM			:', popt[2] * 2 * math.sqrt(2 * math.log(2,math.e))
print clrmsg.DEBUG + 'Std. Amplitude	:', std_height
print clrmsg.DEBUG + 'Std. Location	:', std_mean
print clrmsg.DEBUG + 'Std. FWHM		:', std_sigma * 2 * math.sqrt(2 * math.log(2,math.e))
print clrmsg.DEBUG + 'Mean dy		:', np.absolute(y-data[1]).mean()
print clrmsg.DEBUG + str(ks_2samp(y, data[1]))
return popt, pcov

## Gaussian 2D fit from http://scipy.github.io/old-wiki/pages/Cookbook/FittingData ```
Example 18
 Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes
```def test_ks_2samp():
# exact small sample solution
data1 = np.array([1.0,2.0])
data2 = np.array([1.0,2.0,3.0])
assert_almost_equal(np.array(stats.ks_2samp(data1+0.01,data2)),
np.array((0.33333333333333337, 0.99062316386915694)))
assert_almost_equal(np.array(stats.ks_2samp(data1-0.01,data2)),
np.array((0.66666666666666674, 0.42490954988801982)))
# these can also be verified graphically
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2+0.1)),
np.array((0.030000000000000027, 0.99999999996005062)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2-0.1)),
np.array((0.020000000000000018, 0.99999999999999933)))
# these are just regression tests
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20.1)),
np.array((0.21090909090909091, 0.015880386730710221)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20-0.1)),
np.array((0.20818181818181825, 0.017981441789762638)))

# test for namedtuple attribute results
attributes = ('statistic', 'pvalue')
res = stats.ks_2samp(data1 - 0.01, data2)
check_named_results(res, attributes) ```
Example 19
 Project: ble5-nrf52-mac   Author: tomasero   File: test_multivariate.py    MIT License 5 votes
```def test_haar(self):
# Test that the distribution is constant under rotation
# Every column should have the same distribution
# Additionally, the distribution should be invariant under another rotation

# Generate samples
dim = 5
samples = 1000  # Not too many, or the test takes too long
ks_prob = .05
np.random.seed(514)
xs = special_ortho_group.rvs(dim, size=samples)

# Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
#   effectively picking off entries in the matrices of xs.
#   These projections should all have the same disribution,
#     establishing rotational invariance. We use the two-sided
#     KS test to confirm this.
#   We could instead test that angles between random vectors
#     are uniformly distributed, but the below is sufficient.
#   It is not feasible to consider all pairs, so pick a few.
els = ((0,0), (0,2), (1,4), (2,3))
#proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
assert_array_less([ks_prob]*len(pairs), ks_tests) ```
Example 20
 Project: ble5-nrf52-mac   Author: tomasero   File: test_multivariate.py    MIT License 5 votes
```def test_haar(self):
# Test that the distribution is constant under rotation
# Every column should have the same distribution
# Additionally, the distribution should be invariant under another rotation

# Generate samples
dim = 5
samples = 1000  # Not too many, or the test takes too long
ks_prob = .05
np.random.seed(518)  # Note that the test is sensitive to seed too
xs = ortho_group.rvs(dim, size=samples)

# Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
#   effectively picking off entries in the matrices of xs.
#   These projections should all have the same disribution,
#     establishing rotational invariance. We use the two-sided
#     KS test to confirm this.
#   We could instead test that angles between random vectors
#     are uniformly distributed, but the below is sufficient.
#   It is not feasible to consider all pairs, so pick a few.
els = ((0,0), (0,2), (1,4), (2,3))
#proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
assert_array_less([ks_prob]*len(pairs), ks_tests) ```
Example 21
 Project: Computable   Author: ktraunmueller   File: test_stats.py    MIT License 5 votes
```def test_ks_2samp():
# exact small sample solution
data1 = np.array([1.0,2.0])
data2 = np.array([1.0,2.0,3.0])
assert_almost_equal(np.array(stats.ks_2samp(data1+0.01,data2)),
np.array((0.33333333333333337, 0.99062316386915694)))
assert_almost_equal(np.array(stats.ks_2samp(data1-0.01,data2)),
np.array((0.66666666666666674, 0.42490954988801982)))
# these can also be verified graphically
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2+0.1)),
np.array((0.030000000000000027, 0.99999999996005062)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2-0.1)),
np.array((0.020000000000000018, 0.99999999999999933)))
# these are just regression tests
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20.1)),
np.array((0.21090909090909091, 0.015880386730710221)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20-0.1)),
np.array((0.20818181818181825, 0.017981441789762638))) ```
Example 22
 Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 5 votes
```def test_ks_2samp():
# exact small sample solution
data1 = np.array([1.0,2.0])
data2 = np.array([1.0,2.0,3.0])
assert_almost_equal(np.array(stats.ks_2samp(data1+0.01,data2)),
np.array((0.33333333333333337, 0.99062316386915694)))
assert_almost_equal(np.array(stats.ks_2samp(data1-0.01,data2)),
np.array((0.66666666666666674, 0.42490954988801982)))
# these can also be verified graphically
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2+0.1)),
np.array((0.030000000000000027, 0.99999999996005062)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2-0.1)),
np.array((0.020000000000000018, 0.99999999999999933)))
# these are just regression tests
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20.1)),
np.array((0.21090909090909091, 0.015880386730710221)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20-0.1)),
np.array((0.20818181818181825, 0.017981441789762638)))

# test for namedtuple attribute results
attributes = ('statistic', 'pvalue')
res = stats.ks_2samp(data1 - 0.01, data2)
check_named_results(res, attributes) ```
Example 23
 Project: poker   Author: surgebiswas   File: test_multivariate.py    MIT License 5 votes
```def test_haar(self):
# Test that the distribution is constant under rotation
# Every column should have the same distribution
# Additionally, the distribution should be invariant under another rotation

# Generate samples
dim = 5
samples = 1000  # Not too many, or the test takes too long
ks_prob = 0.39  # ...so don't expect much precision
np.random.seed(514)
xs = special_ortho_group.rvs(dim, size=samples)

# Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
#   effectively picking off entries in the matrices of xs.
#   These projections should all have the same disribution,
#     establishing rotational invariance. We use the two-sided
#     KS test to confirm this.
#   We could instead test that angles between random vectors
#     are uniformly distributed, but the below is sufficient.
#   It is not feasible to consider all pairs, so pick a few.
els = ((0,0), (0,2), (1,4), (2,3))
#proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
assert_array_less([ks_prob]*len(pairs), ks_tests) ```
Example 24
 Project: poker   Author: surgebiswas   File: test_multivariate.py    MIT License 5 votes
```def test_haar(self):
# Test that the distribution is constant under rotation
# Every column should have the same distribution
# Additionally, the distribution should be invariant under another rotation

# Generate samples
dim = 5
samples = 1000  # Not too many, or the test takes too long
ks_prob = 0.39  # ...so don't expect much precision
np.random.seed(518)  # Note that the test is sensitive to seed too
xs = ortho_group.rvs(dim, size=samples)

# Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
#   effectively picking off entries in the matrices of xs.
#   These projections should all have the same disribution,
#     establishing rotational invariance. We use the two-sided
#     KS test to confirm this.
#   We could instead test that angles between random vectors
#     are uniformly distributed, but the below is sufficient.
#   It is not feasible to consider all pairs, so pick a few.
els = ((0,0), (0,2), (1,4), (2,3))
#proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
assert_array_less([ks_prob]*len(pairs), ks_tests) ```
Example 25
 Project: vm-reliability-tester   Author: icclab   File: model_validator.py    MIT License 5 votes
```def fit_model(data, model_name, ratings):
n = len(data)
if model_name == 'expon':
params = convert_string_to_tuple(ratings['expon'][0][0])
print params
std_func = stats.expon.rvs(size=n,
loc=params[0],
scale=params[1])
elif model_name == 'dweibull':
params = convert_string_to_tuple(ratings['dweibull'][0][0])
print params
std_func = stats.dweibull.rvs(params[0],
loc=params[1],
scale=params[2],
size=n)

elif model_name == 'norm':
params = convert_string_to_tuple(ratings['norm'][0][0])
print params
std_func = stats.norm.rvs(loc=params[0],
scale=params[1],
size=n)
elif model_name == 'lognorm':
params = convert_string_to_tuple(ratings['lognorm'][0][0])
print params
std_func = stats.lognorm.rvs(params[0],
loc=params[1],
scale=params[2],
size=n)
std_func.sort()
test_vals = stats.ks_2samp(data, std_func)
result = [test for test in test_vals]
result.append(params)
result.append(std_func)
return result ```
Example 26
 Project: vm-reliability-tester   Author: icclab   File: model_fitter.py    MIT License 5 votes
```def fit_model(data, model_name):
n = len(data)
if(model_name == 'expon'):
params = stats.expon.fit(data)
std_model = stats.expon.rvs(size=n,
loc=params[0],
scale=params[1])
elif(model_name == 'dweibull'):
params = stats.dweibull.fit(data)
std_model = stats.dweibull.rvs(params[0],
loc=params[1],
scale=params[2],
size=n)
elif(model_name == 'norm'):
params = stats.norm.fit(data)
std_model = stats.norm.rvs(loc=params[0],
scale=params[1],
size=n)
elif(model_name == 'lognorm'):
params = stats.lognorm.fit(data)
std_model = stats.lognorm.rvs(params[0],
loc=params[1],
scale=params[2],
size=n)
std_model.sort()
test_vals = stats.ks_2samp(data, std_model)
result = [test for test in test_vals]
result.append(params)
result.append(std_model)
return result ```
Example 27
 Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes
```def _testOne(self, x1, x2, alternative, expected_statistic, expected_prob, mode='auto'):
result = stats.ks_2samp(x1, x2, alternative, mode=mode)
expected = np.array([expected_statistic, expected_prob])
assert_array_almost_equal(np.array(result), expected) ```
Example 28
 Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes
```def testNamedAtributes(self):
# test for namedtuple attribute results
attributes = ('statistic', 'pvalue')
res = stats.ks_2samp([1, 2], [3])
check_named_results(res, attributes) ```
Example 29
 Project: P3_image_processing   Author: latedude2   File: test_multivariate.py    MIT License 5 votes
```def test_haar(self):
# Test that the distribution is constant under rotation
# Every column should have the same distribution
# Additionally, the distribution should be invariant under another rotation

# Generate samples
dim = 5
samples = 1000  # Not too many, or the test takes too long
ks_prob = .05
np.random.seed(514)
xs = special_ortho_group.rvs(dim, size=samples)

# Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
#   effectively picking off entries in the matrices of xs.
#   These projections should all have the same disribution,
#     establishing rotational invariance. We use the two-sided
#     KS test to confirm this.
#   We could instead test that angles between random vectors
#     are uniformly distributed, but the below is sufficient.
#   It is not feasible to consider all pairs, so pick a few.
els = ((0,0), (0,2), (1,4), (2,3))
#proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
assert_array_less([ks_prob]*len(pairs), ks_tests) ```
Example 30
 Project: P3_image_processing   Author: latedude2   File: test_multivariate.py    MIT License 5 votes
```def test_haar(self):
# Test that the distribution is constant under rotation
# Every column should have the same distribution
# Additionally, the distribution should be invariant under another rotation

# Generate samples
dim = 5
samples = 1000  # Not too many, or the test takes too long
ks_prob = .05
np.random.seed(518)  # Note that the test is sensitive to seed too
xs = ortho_group.rvs(dim, size=samples)

# Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
#   effectively picking off entries in the matrices of xs.
#   These projections should all have the same disribution,
#     establishing rotational invariance. We use the two-sided
#     KS test to confirm this.
#   We could instead test that angles between random vectors
#     are uniformly distributed, but the below is sufficient.
#   It is not feasible to consider all pairs, so pick a few.
els = ((0,0), (0,2), (1,4), (2,3))
#proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
assert_array_less([ks_prob]*len(pairs), ks_tests) ```
Example 31
 Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes
```def test_ks_2samp():
# exact small sample solution
data1 = np.array([1.0,2.0])
data2 = np.array([1.0,2.0,3.0])
assert_almost_equal(np.array(stats.ks_2samp(data1+0.01,data2)),
np.array((0.33333333333333337, 0.99062316386915694)))
assert_almost_equal(np.array(stats.ks_2samp(data1-0.01,data2)),
np.array((0.66666666666666674, 0.42490954988801982)))
# these can also be verified graphically
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2+0.1)),
np.array((0.030000000000000027, 0.99999999996005062)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2-0.1)),
np.array((0.020000000000000018, 0.99999999999999933)))
# these are just regression tests
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20.1)),
np.array((0.21090909090909091, 0.015880386730710221)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20-0.1)),
np.array((0.20818181818181825, 0.017981441789762638)))

# test for namedtuple attribute results
attributes = ('statistic', 'pvalue')
res = stats.ks_2samp(data1 - 0.01, data2)
check_named_results(res, attributes) ```
Example 32
 Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_multivariate.py    MIT License 5 votes
```def test_haar(self):
# Test that the distribution is constant under rotation
# Every column should have the same distribution
# Additionally, the distribution should be invariant under another rotation

# Generate samples
dim = 5
samples = 1000  # Not too many, or the test takes too long
ks_prob = .05
np.random.seed(514)
xs = special_ortho_group.rvs(dim, size=samples)

# Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
#   effectively picking off entries in the matrices of xs.
#   These projections should all have the same disribution,
#     establishing rotational invariance. We use the two-sided
#     KS test to confirm this.
#   We could instead test that angles between random vectors
#     are uniformly distributed, but the below is sufficient.
#   It is not feasible to consider all pairs, so pick a few.
els = ((0,0), (0,2), (1,4), (2,3))
#proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
assert_array_less([ks_prob]*len(pairs), ks_tests) ```
Example 33
 Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_multivariate.py    MIT License 5 votes
```def test_haar(self):
# Test that the distribution is constant under rotation
# Every column should have the same distribution
# Additionally, the distribution should be invariant under another rotation

# Generate samples
dim = 5
samples = 1000  # Not too many, or the test takes too long
ks_prob = .05
np.random.seed(518)  # Note that the test is sensitive to seed too
xs = ortho_group.rvs(dim, size=samples)

# Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
#   effectively picking off entries in the matrices of xs.
#   These projections should all have the same disribution,
#     establishing rotational invariance. We use the two-sided
#     KS test to confirm this.
#   We could instead test that angles between random vectors
#     are uniformly distributed, but the below is sufficient.
#   It is not feasible to consider all pairs, so pick a few.
els = ((0,0), (0,2), (1,4), (2,3))
#proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
assert_array_less([ks_prob]*len(pairs), ks_tests) ```
Example 34
 Project: drifter_ml   Author: EricSchles   File: columnar_tests.py    MIT License 5 votes
```def ks_2samp_similar_distribution(self, column,
pvalue_threshold=0.05,
num_rounds=3):
p_value = permutation_test(
self.new_data[column],
self.historical_data[column],
method="approximate",
num_rounds=num_rounds,
func=lambda x, y: stats.ks_2samp(x, y).statistic,
seed=0)
if p_value < pvalue_threshold:
return False
return True ```
Example 35
 Project: drifter_ml   Author: EricSchles   File: prototype_test_framework.py    MIT License 5 votes
```def similiar_distribution(new_data, historical_data, column_names, pvalue_threshold=0.05):
for column_name in column_names:
distribution_info = stats.ks_2samp(new_data[column_name], historical_data[column_name])
if correlation_info.pvalue < pvalue_threshold:
return False
return True

# does the preprocessing break?
# does the model build?
# does the model meet some threshold?
# add memoryful tests for measures over time (like over several days) ```
Example 36
 Project: aurum-datadiscovery   Author: mitdbg   File: dataanalysis.py    MIT License 5 votes
```def compare_num_columns_dist_ks(columnA, columnB):
'''
Kolmogorov-Smirnov test
'''
return ks_2samp(columnA, columnB) ```
Example 37
 Project: anesthetic   Author: williamjameshandley   File: test_samples.py    MIT License 5 votes
```def test_ns_output():
numpy.random.seed(3)
pc = NestedSamples(root='./tests/example_data/pc')
PC = pc.ns_output(1000)
assert abs(pc.logZ() - PC['logZ'].mean()) < PC['logZ'].std()
assert PC['d'].mean() < 5
assert PC.cov()['D']['logZ'] < 0
assert(abs(PC.logZ.mean() - pc.logZ()) < PC.logZ.std())
assert(abs(PC.D.mean() - pc.D()) < PC.D.std())
assert(abs(PC.d.mean() - pc.d()) < PC.d.std())
assert(ks_2samp(pc.logZ(100), PC.logZ).pvalue > 0.05)
assert(ks_2samp(pc.D(100), PC.D).pvalue > 0.05)
assert(ks_2samp(pc.d(100), PC.d).pvalue > 0.05) ```
Example 38
 Project: cINNamon   Author: hagabbar   File: Radynversion_copy.py    GNU General Public License v3.0 5 votes
```def result_stat_tests(inn_samps, mcmc_samps, cnt, parnames):
"""
Record and print ks and AD test statistics
"""

ks_mcmc_arr = []
ks_inn_arr = []

# iterate through each parameter
for i in range(inn_samps.shape[1]):
# get ideal bayesian number. We want the 2 tailed p value from the KS test FYI
ks_mcmc_result = ks_2samp(mcmc_samps[:int(mcmc_samps.shape[0]/2.0), i], mcmc_samps[int(mcmc_samps.shape[0]/2.0):, i])
ad_mcmc_result = anderson_ksamp([mcmc_samps[:int(mcmc_samps.shape[0]/2.0), i], mcmc_samps[int(mcmc_samps.shape[0]/2.0):, i]])

# get predicted vs. true number
ks_inn_result = ks_2samp(inn_samps[:,i],mcmc_samps[:,i])
#print('Test Case %d, Parameter(%s) k-s result: [Ideal(%.6f), Predicted(%.6f)]' % (int(cnt),parnames[i],np.array(ks_mcmc_result[1]),np.array(ks_inn_result[1])))
#print('Test Case %d, Parameter(%s) A-D result: [Ideal(%.6f), Predicted(%.6f)]' % (int(cnt),parnames[i],np.array(ad_mcmc_result[0]),np.array(ad_inn_result[0])))

# store result stats
ks_mcmc_arr.append(ks_mcmc_result[1])
ks_inn_arr.append(ks_inn_result[1])

Example 39
 Project: kepler   Author: jaidevd   File: utils.py    MIT License 5 votes
```def is_uniform(x):
marker = np.random.uniform(x.min(), x.max(), size=(100,))
_, pval = ks_2samp(x.ravel(), marker)
return pval > 1e-3 ```
Example 40
 Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 5 votes
```def test_ks_2samp():
# exact small sample solution
data1 = np.array([1.0,2.0])
data2 = np.array([1.0,2.0,3.0])
assert_almost_equal(np.array(stats.ks_2samp(data1+0.01,data2)),
np.array((0.33333333333333337, 0.99062316386915694)))
assert_almost_equal(np.array(stats.ks_2samp(data1-0.01,data2)),
np.array((0.66666666666666674, 0.42490954988801982)))
# these can also be verified graphically
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2+0.1)),
np.array((0.030000000000000027, 0.99999999996005062)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2-0.1)),
np.array((0.020000000000000018, 0.99999999999999933)))
# these are just regression tests
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20.1)),
np.array((0.21090909090909091, 0.015880386730710221)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20-0.1)),
np.array((0.20818181818181825, 0.017981441789762638)))

# test for namedtuple attribute results
attributes = ('statistic', 'pvalue')
res = stats.ks_2samp(data1 - 0.01, data2)
check_named_results(res, attributes) ```
Example 41
 Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_multivariate.py    Apache License 2.0 5 votes
```def test_haar(self):
# Test that the distribution is constant under rotation
# Every column should have the same distribution
# Additionally, the distribution should be invariant under another rotation

# Generate samples
dim = 5
samples = 1000  # Not too many, or the test takes too long
ks_prob = 0.39  # ...so don't expect much precision
np.random.seed(514)
xs = special_ortho_group.rvs(dim, size=samples)

# Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
#   effectively picking off entries in the matrices of xs.
#   These projections should all have the same disribution,
#     establishing rotational invariance. We use the two-sided
#     KS test to confirm this.
#   We could instead test that angles between random vectors
#     are uniformly distributed, but the below is sufficient.
#   It is not feasible to consider all pairs, so pick a few.
els = ((0,0), (0,2), (1,4), (2,3))
#proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
assert_array_less([ks_prob]*len(pairs), ks_tests) ```
Example 42
 Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_multivariate.py    Apache License 2.0 5 votes
```def test_haar(self):
# Test that the distribution is constant under rotation
# Every column should have the same distribution
# Additionally, the distribution should be invariant under another rotation

# Generate samples
dim = 5
samples = 1000  # Not too many, or the test takes too long
ks_prob = 0.39  # ...so don't expect much precision
np.random.seed(518)  # Note that the test is sensitive to seed too
xs = ortho_group.rvs(dim, size=samples)

# Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
#   effectively picking off entries in the matrices of xs.
#   These projections should all have the same disribution,
#     establishing rotational invariance. We use the two-sided
#     KS test to confirm this.
#   We could instead test that angles between random vectors
#     are uniformly distributed, but the below is sufficient.
#   It is not feasible to consider all pairs, so pick a few.
els = ((0,0), (0,2), (1,4), (2,3))
#proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
assert_array_less([ks_prob]*len(pairs), ks_tests) ```
Example 43
 Project: phenotypeXpression   Author: NCBI-Hackathons   File: batcheffect.py    MIT License 5 votes
```def _generate_ks_test(self, meta_value: int, total_dist: List, clust_stats=None) -> Dict:
"""
Kolmogrov-Smirnov test for cluster vals and total vals from same dist
:param meta_value: integer index for gds metric
:param total_dist: list distribution of gds metric for ONE gds set
:param clust_stats: pass mutable dict for recursive additions
key=cluster_ids, value=ordered list of stats for all gds metrics
(KS/chi-stat, p-value, mean/mode, s.d./category_n)
:return:
"""
# Create if empty
if clust_stats is None:
clust_stats = defaultdict(list)

#total output values
total_array = np.array(total_dist)
clust_stats['Overall'].extend([None, None, np.mean(total_array), np.std(total_array, ddof=1)])
# print(total_dist)

#per cluster outputs
for cluster_id, cluster_list in self.clusters.items():
cluster_dist = [self.meta_dict[gds][meta_value] for gds in cluster_list]
# print(cluster_list)
clust_array = np.array(cluster_dist)
KS_stat, p_val = stats.ks_2samp(clust_array, total_array)
clust_stats[cluster_id].extend([KS_stat, p_val, np.mean(clust_array), np.std(clust_array, ddof=1)])
if p_val <= 0.01:
print('{}\'s {} sample is drawn from a significantly different distribution than the overall sample'.format(cluster_id, self.meta_list[meta_value]))
return

# For chi-squared test of platform types different from overall ```
Example 44
 Project: FAVITES   Author: niemasd   File: distribution_distance.py    GNU General Public License v3.0 5 votes
```def d_ks(p_vec, q_vec):
return tuple(ks_2samp(p_vec,q_vec))

# Jensen-Shannon Divergence ```
Example 45
 Project: iswc-2016-semantic-labeling   Author: minhptx   File: numeric.py    Apache License 2.0 5 votes
```def kolmogorov_smirnov_test(train_examples, test_examples, num1, num2):
if len(train_examples) > 1 and len(test_examples) > 1:
result = ks_2samp(train_examples, test_examples)[1]
return balance_result(num1, num2, True, result)
return 0 ```
Example 46
 Project: edgePy   Author: r-bioinformatics   File: edgepy.py    MIT License 5 votes
```def ks_2_samples(self):
"""Run a 2-tailed Kolmogorov-Smirnov test on the DGEList object.

Args:
None.

Returns:
gene_details: a dictionary of dictionary (key, gene), holding mean1 and mean2 for the two groups
gene_likelihood: a dictionary (key, gene), holding the p-value of the separation of the two groups
group_types: list of the groups in order.

"""
gene_likelihood1: Dict[Hashable, float] = {}
group_types = set(self.dge_list.groups_list)
group_types = list(group_types)
group_filters: Dict[Hashable, Any] = {}
gene_details: Dict[Hashable, Dict[Hashable, Any]] = {}
for group in group_types:
group_filters[group] = [g == group for g in self.dge_list.groups_list]
for gene_idx, gene in enumerate(self.dge_list.genes):
gene_row = self.dge_list.counts[gene_idx]
if len(group_types) == 2:
group_data1 = gene_row.compress(group_filters[group_types[0]])
mean1 = np.mean(group_data1)

group_data2 = gene_row.compress(group_filters[group_types[1]])
mean2 = np.mean(group_data2)

gene_likelihood1[gene] = ks_2samp(group_data1, group_data2)[1]

gene_details[gene] = {'mean1': mean1, 'mean2': mean2}
return gene_details, gene_likelihood1, group_types ```
Example 47
 Project: numerai   Author: gansanay   File: univariateselection.py    MIT License 5 votes
```def kolmogorov_smirnov(x_train, x_test):
r = []
p = []
for c in x_train.columns:
r_, p_ = ks_2samp(x_train[c], x_test[c])
r.append(r_)
p.append(p_)
dfks = pd.DataFrame(index=range(1, 1 + len(x_train.columns)))
dfks['KS'] = r
dfks['KS_p'] = p
return dfks ```
Example 48
 Project: bnn-analysis   Author: myshkov   File: metrics.py    MIT License 5 votes
```def ks_distance(p_samples, q_samples):
if isinstance(p_samples, tuple):
idx, p_samples = p_samples

return sc.ks_2samp(p_samples, q_samples)[0] ```
Example 49
 Project: pbase   Author: Impavidity   File: visualizer.py    MIT License 5 votes
```def ks_significance(self):
dis1 = np.array(self.dis1)
dis2 = np.array(self.dis2)
return stats.ks_2samp(dis1, dis2) ```
Example 50
 Project: GCIT   Author: alexisbellot   File: utils.py    MIT License 5 votes
```def kolmogorov(X,Y):

X = X.reshape((len(X)))
Y = Y.reshape((len(Y)))
return ks_2samp(X, Y)[0] ```
Example 51
 Project: TeaML   Author: TianjinMouth   File: tea_utils.py    Apache License 2.0 5 votes
```def compute_ks(prob, target):
"""
target: numpy array of shape (1,)
prob: numpy array of shape (1,), predicted probability of the sample being positive
returns:
ks: float, ks score estimation
"""
get_ks = lambda prob, target: ks_2samp(prob[target == 1], prob[target != 1]).statistic

return get_ks(prob, target) ```
Example 52
 Project: senior-design   Author: james-tate   File: test_stats.py    GNU General Public License v2.0 5 votes
```def test_ks_2samp():
#exact small sample solution
data1 = np.array([1.0,2.0])
data2 = np.array([1.0,2.0,3.0])
assert_almost_equal(np.array(stats.ks_2samp(data1+0.01,data2)),
np.array((0.33333333333333337, 0.99062316386915694)))
assert_almost_equal(np.array(stats.ks_2samp(data1-0.01,data2)),
np.array((0.66666666666666674, 0.42490954988801982)))
#these can also be verified graphically
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2+0.1)),
np.array((0.030000000000000027, 0.99999999996005062)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,100)+2-0.1)),
np.array((0.020000000000000018, 0.99999999999999933)))
#these are just regression tests
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20.1)),
np.array((0.21090909090909091, 0.015880386730710221)))
assert_almost_equal(
np.array(stats.ks_2samp(np.linspace(1,100,100),
np.linspace(1,100,110)+20-0.1)),
np.array((0.20818181818181825, 0.017981441789762638))) ```
Example 53
 Project: default-credit-card-prediction   Author: alexpnt   File: feature_selection.py    MIT License 5 votes
```def kolmogorov_smirnov_two_sample_test(sample_a,sample_b):
"""
Performs the two sample Kolmogorov-Smirnov test, testing wheter twoa samples are drawn from identical distributions

Keyword arguments:
sample_a -- The first sample
sample_b -- The second sample
"""

return stats.ks_2samp(sample_a,sample_b) ```
Example 54
 Project: causallib   Author: IBM   File: filters.py    Apache License 2.0 5 votes
```def compute_pvals(self, X, y):
# TODO: export to stats_utils?
is_y_binary = (len(np.unique(y)) == 2)
# is_binary_feature = np.sum(((X != np.nanmin(X, axis=0)[np.newaxis, :]) &
#                             (X != np.nanmax(X, axis=0)[np.newaxis, :])), axis=0) == 0
is_binary_feature = areColumnsBinary(X)
p_vals = np.zeros(X.shape[1])
if is_y_binary:
# Process non-binary columns:
for i in np.where(~is_binary_feature)[0]:
x0 = X.loc[y == 0, i]
x1 = X.loc[y == 1, i]
if self.is_linear:
_, p_vals[i] = stats.ttest_ind(x0, x1)
else:
_, p_vals[i] = stats.ks_2samp(x0, x1)

# Process binary features:
_, p_vals[is_binary_feature] = feature_selection.chi2(X.loc[:, is_binary_feature], y)

else:
# Process non-binary features:
_, p_vals[~is_binary_feature] = feature_selection.f_regression(X.loc[:, ~is_binary_feature], y)

# Process binary features:
y_mat = np.row_stack(y)
for i in np.where(is_binary_feature)[0]:
_, p_vals[i] = feature_selection.f_regression(y_mat, X.loc[:, i])
return p_vals ```
Example 55
 Project: numerox   Author: numerai   File: metrics.py    GNU General Public License v3.0 5 votes
```def concordance(data, prediction, split_pairs=True):
"Concordance; less than 0.12 is passing; data should be the full dataset."

pairs = prediction.pairs(as_str=False)
concords = pd.DataFrame(columns=['concord'], index=pairs)

# fit clusters
kmeans = MiniBatchKMeans(n_clusters=5, random_state=1337)
kmeans.fit(data.x)

# yhats and clusters for each region
yhats = []
clusters = []
for region in ['validation', 'test', 'live']:
d = data[region]
cluster = kmeans.predict(d.x)
clusters.append(cluster)
yh = prediction.df.loc[d.df.index].values  # align
yhats.append(yh)

# cross cluster distance (KS distance)
for i in range(len(pairs)):
ks = []
for j in set(clusters[0]):
yhat0 = yhats[0][:, i][clusters[0] == j]
yhat1 = yhats[1][:, i][clusters[1] == j]
yhat2 = yhats[2][:, i][clusters[2] == j]
d = [ks_2samp(yhat0, yhat1)[0],
ks_2samp(yhat0, yhat2)[0],
ks_2samp(yhat2, yhat1)[0]]
ks.append(max(d))
concord = np.mean(ks)
concords.iloc[i] = concord

concords = concords.sort_values('concord')

if split_pairs:

return concords ```
Example 56
 Project: dc_stat_think   Author: justinbois   File: test_dc_stat_think.py    MIT License 5 votes
```def test_ks_stat(x):
theor_data = np.random.normal(0, 1, size=100)
correct, _ = st.ks_2samp(x, theor_data)
assert np.isclose(dcst.ks_stat(x, theor_data), correct)

theor_data = np.random.exponential(1, size=100)
correct, _ = st.ks_2samp(x, theor_data)
assert np.isclose(dcst.ks_stat(x, theor_data), correct)

theor_data = np.random.logistic(0, 1, size=100)
correct, _ = st.ks_2samp(x, theor_data)
assert np.isclose(dcst.ks_stat(x, theor_data), correct) ```
Example 57
 Project: dc_stat_think   Author: justinbois   File: test_dc_stat_think.py    MIT License 5 votes
```def test_pandas_conversion(seed):
df = pd.DataFrame({'a': [3, 2, 1, 4],
'b': [8, 6, 7, 5],
'c': [9.1, 10.1, 11.1, np.nan]})

x, y = dcst.ecdf(df.loc[:, 'a'])
assert (x == np.array([1, 2, 3, 4])).all()
assert (y == np.array([0.25, 0.5, 0.75, 1.0])).all()

x, y = dcst.ecdf(df.loc[:, 'c'])
assert np.allclose(x, np.array([9.1, 10.1, 11.1]))
assert np.allclose(y, np.array([1/3, 2/3, 1.0]))

df = pd.DataFrame({
'a': np.concatenate((np.random.normal(0, 1, size=10), [np.nan]*990)),
'b': np.random.normal(0, 1, size=1000)})
correct, _ = st.ks_2samp(df['a'].dropna(), df['b'])
assert np.isclose(dcst.ks_stat(df['a'], df['b']), correct)

df = pd.DataFrame({
'a': np.concatenate((np.random.normal(0, 1, size=80), [np.nan]*20)),
'b': np.random.normal(0, 1, size=100)})
dcst_private._seed_numba(seed)
correct = dcst.draw_bs_reps(df['a'].values, np.mean, size=100)
dcst_private._seed_numba(seed)
assert np.allclose(dcst.draw_bs_reps(df['a'], np.mean, size=100), correct,
atol=atol)

dcst_private._seed_numba(seed)
correct = dcst.draw_bs_reps(df['b'].values, np.mean, size=100)
dcst_private._seed_numba(seed)
assert np.allclose(dcst.draw_bs_reps(df['b'], np.mean, size=100), correct,
atol=atol)

dcst_private._seed_numba(seed)
correct = dcst.draw_perm_reps(df['a'].values, df['b'].values,
dcst.diff_of_means, size=100)
dcst_private._seed_numba(seed)
assert np.allclose(dcst.draw_perm_reps(df['a'], df['b'],
dcst.diff_of_means, size=100), correct, atol=atol) ```
Example 58
 Project: DIVE-backend   Author: MacroConnections   File: numerical_comparison.py    GNU General Public License v3.0 4 votes
```def get_valid_tests(equal_var, independent, normal, num_samples):
'''
Get valid tests given number of samples and statistical characterization of
samples:

Equal variance
Indepenence
Normality
'''
if num_samples == 1:
valid_tests = {
'chisquare': stats.chisquare,
'power_divergence': stats.power_divergence,
'kstest': stats.kstest
}
if normal:
valid_tests['input']['one_sample_ttest'] = stats.ttest_1samp

elif num_samples == 2:
if independent:
valid_tests = {
'mannwhitneyu': stats.mannwhitneyu,
'kruskal': stats.kruskal,
'ks_2samp': stats.ks_2samp
}
if normal:
valid_tests['two_sample_ttest'] = stats.ttest_ind
if equal_var:
valid_tests['f_oneway'] = stats.f_oneway
else:
valid_tests = {
'two_sample_ks': stats.ks_2samp,
'wilcoxon': stats.wilcoxon
}
if normal:
valid_tests['two_sample_related_ttest'] = stats.ttest_rel

elif num_samples >= 3:
if independent:
valid_tests = {
'kruskal': stats.kruskal
}
if normal and equal_var:
valid_tests['f_oneway'] = stats.f_oneway

else:
valid_tests['friedmanchisquare'] = stats.friedmanchisquare

return valid_tests ```
Example 59
 Project: frbpoppy   Author: davidgardenier   File: frbpoppy_frbcat.py    MIT License 4 votes
```def plot_dists(surv_pop, telescope):
"""Plot the fluence and DM distribution of a surveyed population.

Args:
surv_pop (Population): Population from which to plot
telescope (str): Name of the telescope with which to compare the
distribution. Necessary for Frbcat.

"""
plot_aa_style(cols=2)

# Plot dm distribution
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)

dm_frbpoppy = surv_pop.frbs.dm
pprint(f'Number of detected FRBs: {len(dm_frbpoppy)}')
ax1.step(*hist(dm_frbpoppy), where='mid', linestyle='dashed')

df = Frbcat().df
dm_frbcat = df[df.telescope == telescope].dm
ax1.step(*hist(dm_frbcat), where='mid')

# Compare distributions
ks = ks_2samp(dm_frbpoppy, dm_frbcat)
text = fr'\$p={round(ks[1], 2)}\$'
anchored_text = AnchoredText(text, loc=1, borderpad=1., frameon=False)

ax1.set_xlabel(r'DM (\$\textrm{pc}\ \textrm{cm}^{-3}\$)')
ax1.set_ylabel('Fraction')
ax1.set_ylim([0, 1.1])
ax1.set_xlim([0, 3500])

# Plot fluence distributions
fluence_frbpoppy = surv_pop.frbs.fluence
ax2.step(*hist(fluence_frbpoppy, bin_type='log'), where='mid',
label='frbpoppy', linestyle='dashed')

fluence_frbcat = df[df.telescope == telescope].fluence
ax2.step(*hist(fluence_frbcat, bin_type='log'), where='mid',
label='frbcat')

# Compare distributions
ks = ks_2samp(fluence_frbpoppy, fluence_frbcat)
text = fr'\$p={round(ks[1], 2)}\$'
anchored_text = AnchoredText(text, loc=1, borderpad=1., frameon=False)

ax2.set_xlabel(r'Fluence (Jy ms)')
ax2.set_ylim([0, 1.1])
ax2.set_xlim([5e-1, 1e4])
plt.xscale('log')

plt.figlegend(loc='upper center', ncol=2, framealpha=1)

plt.tight_layout()
plt.savefig(rel_path(f'plots/frbpoppy_{telescope}.pdf'))
plt.clf() ```
Example 60
 Project: cINNamon   Author: hagabbar   File: gw_INN.py    GNU General Public License v3.0 4 votes
```def overlap_tests(pred_samp,lalinf_samp,true_vals,kernel_cnn,kernel_lalinf):
""" Perform Anderson-Darling, K-S, and overlap tests
to get quantifiable values for accuracy of GAN
PE method
Parameters
----------
pred_samp: numpy array
predicted PE samples from CNN
lalinf_samp: numpy array
predicted PE samples from lalinference
true_vals:
true scalar point values for parameters to be estimated (taken from GW event paper)
kernel_cnn: scipy kde instance
gaussian kde of CNN results
kernel_lalinf: scipy kde instance
gaussian kde of lalinference results
Returns
-------
ks_score:
k-s test score
anderson-darling score
beta_score:
overlap score. used to determine goodness of CNN PE estimates
"""

# do k-s test
ks_mc_score = ks_2samp(pred_samp[:,0].reshape(pred_samp[:,0].shape[0],),lalinf_samp[0][:])
ks_q_score = ks_2samp(pred_samp[:,1].reshape(pred_samp[:,1].shape[0],),lalinf_samp[1][:])
ks_score = np.array([ks_mc_score,ks_q_score])

# do anderson-darling test

# compute overlap statistic
comb_mc = np.concatenate((pred_samp[:,0].reshape(pred_samp[:,0].shape[0],1),lalinf_samp[0][:].reshape(lalinf_samp[0][:].shape[0],1)))
comb_q = np.concatenate((pred_samp[:,1].reshape(pred_samp[:,1].shape[0],1),lalinf_samp[1][:].reshape(lalinf_samp[1][:].shape[0],1)))
X, Y = np.mgrid[np.min(comb_mc):np.max(comb_mc):100j, np.min(comb_q):np.max(comb_q):100j]
positions = np.vstack([X.ravel(), Y.ravel()])
#cnn_pdf = np.reshape(kernel_cnn(positions).T, X.shape)
#print(positions.shape,pred_samp.shape)
cnn_pdf = kernel_cnn.pdf(positions)

#X, Y = np.mgrid[np.min(lalinf_samp[0][:]):np.max(lalinf_samp[0][:]):100j, np.min(lalinf_samp[1][:]):np.max(lalinf_samp[1][:]):100j]
positions = np.vstack([X.ravel(), Y.ravel()])
#lalinf_pdf = np.reshape(kernel_lalinf(positions).T, X.shape)
lalinf_pdf = kernel_lalinf.pdf(positions)

beta_score = np.divide(np.sum( cnn_pdf*lalinf_pdf ),
np.sqrt(np.sum( cnn_pdf**2 ) *
np.sum( lalinf_pdf**2 )))

return ks_score, ad_score, beta_score ```
Example 61
 Project: cINNamon   Author: hagabbar   File: gaussian_INN.py    GNU General Public License v3.0 4 votes
```def overlap_tests(pred_samp,lalinf_samp,true_vals,kernel_cnn,kernel_lalinf):
""" Perform Anderson-Darling, K-S, and overlap tests
to get quantifiable values for accuracy of GAN
PE method
Parameters
----------
pred_samp: numpy array
predicted PE samples from CNN
lalinf_samp: numpy array
predicted PE samples from lalinference
true_vals:
true scalar point values for parameters to be estimated (taken from GW event paper)
kernel_cnn: scipy kde instance
gaussian kde of CNN results
kernel_lalinf: scipy kde instance
gaussian kde of lalinference results
Returns
-------
ks_score:
k-s test score
anderson-darling score
beta_score:
overlap score. used to determine goodness of CNN PE estimates
"""

# do k-s test
ks_mc_score = ks_2samp(pred_samp[:,0].reshape(pred_samp[:,0].shape[0],),lalinf_samp[0][:])
ks_q_score = ks_2samp(pred_samp[:,1].reshape(pred_samp[:,1].shape[0],),lalinf_samp[1][:])
ks_score = np.array([ks_mc_score,ks_q_score])

# do anderson-darling test

# compute overlap statistic
comb_mc = np.concatenate((pred_samp[:,0].reshape(pred_samp[:,0].shape[0],1),lalinf_samp[0][:].reshape(lalinf_samp[0][:].shape[0],1)))
comb_q = np.concatenate((pred_samp[:,1].reshape(pred_samp[:,1].shape[0],1),lalinf_samp[1][:].reshape(lalinf_samp[1][:].shape[0],1)))
X, Y = np.mgrid[np.min(comb_mc):np.max(comb_mc):100j, np.min(comb_q):np.max(comb_q):100j]
positions = np.vstack([X.ravel(), Y.ravel()])
#cnn_pdf = np.reshape(kernel_cnn(positions).T, X.shape)
#print(positions.shape,pred_samp.shape)
cnn_pdf = kernel_cnn.pdf(positions)

#X, Y = np.mgrid[np.min(lalinf_samp[0][:]):np.max(lalinf_samp[0][:]):100j, np.min(lalinf_samp[1][:]):np.max(lalinf_samp[1][:]):100j]
positions = np.vstack([X.ravel(), Y.ravel()])
#lalinf_pdf = np.reshape(kernel_lalinf(positions).T, X.shape)
lalinf_pdf = kernel_lalinf.pdf(positions)

beta_score = np.divide(np.sum( cnn_pdf*lalinf_pdf ),
np.sqrt(np.sum( cnn_pdf**2 ) *
np.sum( lalinf_pdf**2 )))

return ks_score, ad_score, beta_score ```
Example 62
 Project: cINNamon   Author: hagabbar   File: Radynversion.py    GNU General Public License v3.0 4 votes
```def overlap_tests(pred_samp,lalinf_samp,true_vals,kernel_cnn,kernel_lalinf):
""" Perform Anderson-Darling, K-S, and overlap tests
to get quantifiable values for accuracy of GAN
PE method
Parameters
----------
pred_samp: numpy array
predicted PE samples from CNN
lalinf_samp: numpy array
predicted PE samples from lalinference
true_vals:
true scalar point values for parameters to be estimated (taken from GW event paper)
kernel_cnn: scipy kde instance
gaussian kde of CNN results
kernel_lalinf: scipy kde instance
gaussian kde of lalinference results
Returns
-------
ks_score:
k-s test score
anderson-darling score
beta_score:
overlap score. used to determine goodness of CNN PE estimates
"""

# do k-s test
ks_mc_score = ks_2samp(pred_samp[:,0].reshape(pred_samp[:,0].shape[0],),lalinf_samp[0][:])
ks_q_score = ks_2samp(pred_samp[:,1].reshape(pred_samp[:,1].shape[0],),lalinf_samp[1][:])
ks_score = np.array([ks_mc_score,ks_q_score])

# do anderson-darling test

# compute overlap statistic
comb_mc = np.concatenate((pred_samp[:,0].reshape(pred_samp[:,0].shape[0],1),lalinf_samp[0][:].reshape(lalinf_samp[0][:].shape[0],1)))
comb_q = np.concatenate((pred_samp[:,1].reshape(pred_samp[:,1].shape[0],1),lalinf_samp[1][:].reshape(lalinf_samp[1][:].shape[0],1)))
X, Y = np.mgrid[np.min(comb_mc):np.max(comb_mc):100j, np.min(comb_q):np.max(comb_q):100j]
positions = np.vstack([X.ravel(), Y.ravel()])
#cnn_pdf = np.reshape(kernel_cnn(positions).T, X.shape)
#print(positions.shape,pred_samp.shape)
cnn_pdf = kernel_cnn.pdf(positions)

#X, Y = np.mgrid[np.min(lalinf_samp[0][:]):np.max(lalinf_samp[0][:]):100j, np.min(lalinf_samp[1][:]):np.max(lalinf_samp[1][:]):100j]
positions = np.vstack([X.ravel(), Y.ravel()])
#lalinf_pdf = np.reshape(kernel_lalinf(positions).T, X.shape)
lalinf_pdf = kernel_lalinf.pdf(positions)

beta_score = np.divide(np.sum( cnn_pdf*lalinf_pdf ),
np.sqrt(np.sum( cnn_pdf**2 ) *
np.sum( lalinf_pdf**2 )))

return ks_score, ad_score, beta_score

# Load the training data -- you will need to modify this path.

# In[4]: ```
Example 63
 Project: cINNamon   Author: hagabbar   File: gw_INN.py    GNU General Public License v3.0 4 votes
```def overlap_tests(pred_samp,lalinf_samp,true_vals,kernel_cnn,kernel_lalinf):
""" Perform Anderson-Darling, K-S, and overlap tests
to get quantifiable values for accuracy of GAN
PE method
Parameters
----------
pred_samp: numpy array
predicted PE samples from CNN
lalinf_samp: numpy array
predicted PE samples from lalinference
true_vals:
true scalar point values for parameters to be estimated (taken from GW event paper)
kernel_cnn: scipy kde instance
gaussian kde of CNN results
kernel_lalinf: scipy kde instance
gaussian kde of lalinference results
Returns
-------
ks_score:
k-s test score
anderson-darling score
beta_score:
overlap score. used to determine goodness of CNN PE estimates
"""

# do k-s test
ks_mc_score = ks_2samp(pred_samp[:,0].reshape(pred_samp[:,0].shape[0],),lalinf_samp[0][:])
ks_q_score = ks_2samp(pred_samp[:,1].reshape(pred_samp[:,1].shape[0],),lalinf_samp[1][:])
ks_score = np.array([ks_mc_score,ks_q_score])

# do anderson-darling test

# compute overlap statistic
comb_mc = np.concatenate((pred_samp[:,0].reshape(pred_samp[:,0].shape[0],1),lalinf_samp[0][:].reshape(lalinf_samp[0][:].shape[0],1)))
comb_q = np.concatenate((pred_samp[:,1].reshape(pred_samp[:,1].shape[0],1),lalinf_samp[1][:].reshape(lalinf_samp[1][:].shape[0],1)))
X, Y = np.mgrid[np.min(comb_mc):np.max(comb_mc):100j, np.min(comb_q):np.max(comb_q):100j]
positions = np.vstack([X.ravel(), Y.ravel()])
#cnn_pdf = np.reshape(kernel_cnn(positions).T, X.shape)
#print(positions.shape,pred_samp.shape)
cnn_pdf = kernel_cnn.pdf(positions)

#X, Y = np.mgrid[np.min(lalinf_samp[0][:]):np.max(lalinf_samp[0][:]):100j, np.min(lalinf_samp[1][:]):np.max(lalinf_samp[1][:]):100j]
positions = np.vstack([X.ravel(), Y.ravel()])
#lalinf_pdf = np.reshape(kernel_lalinf(positions).T, X.shape)
lalinf_pdf = kernel_lalinf.pdf(positions)

beta_score = np.divide(np.sum( cnn_pdf*lalinf_pdf ),
np.sqrt(np.sum( cnn_pdf**2 ) *
np.sum( lalinf_pdf**2 )))

return ks_score, ad_score, beta_score ```
Example 64
 Project: gwin   Author: gwastro   File: burn_in.py    GNU General Public License v3.0 4 votes
```def ks_test(sampler, fp, threshold=0.9):
"""Burn in based on whether the p-value of the KS test between the samples
at the last iteration and the samples midway along the chain for each
parameter is > ``threshold``.

Parameters
----------
sampler : gwin.sampler
Sampler to determine burn in for. May be either an instance of a
`gwin.sampler`, or the class itself.
fp : InferenceFile
Open inference hdf file containing the samples to load for determing
burn in.
threshold : float
The thershold to use for the p-value. Default is 0.9.

Returns
-------
burn_in_idx : array
Array of indices giving the burn-in index for each chain.
is_burned_in : array
Array of booleans indicating whether each chain is burned in.
"""
nwalkers = fp.nwalkers
niterations = fp.niterations
# Create a dictionary which would have keys are the variable args and
# values are booleans indicating whether the p-value for the parameters
# satisfies the KS test
is_burned_in_param = {}
# iterate over the parameters
for param in fp.variable_params:
# read samples for the parameter from the last iteration of the chain
samples_last_iter = sampler.read_samples(fp, param, iteration=-1,
flatten=True)[param]
# read samples for the parameter from the iteration midway
# along the chain
fp, param, iteration=int(niterations/2), flatten=True)[param]
_, p_value = ks_2samp(samples_last_iter, samples_chain_midpt)
# check if p_value is > than the desired range
is_burned_in_param[param] = p_value > threshold

# The chains are burned in if the p-value of the KS test lies
# in the range [0.1,0.9] for all the parameters.
# If the KS test is passed, the chains have burned in at their
# mid-way point.
if all(is_burned_in_param.values()):
is_burned_in = numpy.ones(nwalkers, dtype=bool)
burn_in_idx = numpy.repeat(niterations/2, nwalkers).astype(int)
else:
is_burned_in = numpy.zeros(nwalkers, dtype=bool)
burn_in_idx = numpy.repeat(niterations, nwalkers).astype(int)
return burn_in_idx, is_burned_in ```
Example 65
 Project: SamPy   Author: sniemi   File: AnalyseFieldEs.py    BSD 2-Clause "Simplified" License 4 votes
```def main(saved):
import scipy.stats as S
import numpy as N

#Constants
save = saved
bmag = 45
volume = 5.*194.**3.
type = 9

#marks only the ones which fulfil criterion
indices = N.where(ET4[:,bmag] <= -19.)
ET4magb = ET4[indices]

#Takes away three that are actually subhaloes...
ind = N.where(fEsall[:,type] == 0)
fEs = fEsall[ind]

hists = histograms()

print '\nNumber of fEs and Es:\n %i %i' % (len(fEs), len(ET4magb))

for plot in hists:
column = hists[plot]['column']
file = hists[plot]['file']
bins = hists[plot]['bins']
min =  hists[plot]['min']
max =  hists[plot]['max']

if (file == 'colour'):
temp = fEs[:,45] - fEs[:,47]
temp1 = ET4magb[:,45] - ET4magb[:,47]
elif (file == 'companions'):
temp = fEs[:,63] + fEs[:,64]
temp1 = fEs[:,64]
else:
temp = fEs[:,column]
temp1 = ET4magb[:,column]

SMNhistogram(file, temp, temp1, bins, min, max, save, volume)

D, p = S.ks_2samp(temp, temp1)
print '\nKolmogorov-Smirnov statistics for %s:' % file
print 'D-value = %e \np-value = %e' % (D, p)

print '\nStatistics 1st fEs and then Es:'
print ("%20s" + "%15s"*7) % ("Name", "1st Quart", "Median", "3rd Quart", "Max", "Min", "Mean", "Stdev")
frmt = "%14s & %12.2f & %12.2f & %12.2f & %12.2f & %12.2f & %12.2f & %12.2f"
frmt1 = "IfEs &" + frmt
frmt2 = "Es   &" + frmt
print frmt1 % (file, S.scoreatpercentile(temp, 25),
N.median(temp), S.scoreatpercentile(temp, 75),
N.max(temp), N.min(temp), N.mean(temp), N.std(temp))
print frmt2 % (file, S.scoreatpercentile(temp1, 25),
N.median(temp1), S.scoreatpercentile(temp1, 75),
N.max(temp1), N.min(temp1), N.mean(temp1), N.std(temp1)) ```