# Python scipy.stats.sem() Examples

The following are code examples for showing how to use scipy.stats.sem(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
def conf_interval(data_series):
"""
Calculate the confidence interval for the data distribution under the assumptions that it can be calculated using \
a student-t distribution.

:return start: Starting value of the interval
:return end: Ending value of the interval
"""

mean = np.mean(data_series)

conf_int = sem(data_series) * t.ppf((1 + 0.95) / 2, len(data_series) - 1)

start = mean - conf_int
end = mean + conf_int

return start, end 
Example 2
def optimally_reblocked(data):
"""
Find optimal reblocking of input data. Takes in pandas
DataFrame of raw data to reblock, returns DataFrame
of reblocked data.
"""
opt = opt_block(data)
n_reblock = int(np.amax(opt))
rb_data = reblock_by2(data, n_reblock)
serr = rb_data.sem(axis=0)
d = {
"mean": rb_data.mean(axis=0),
"standard error": serr,
"standard error error": serr / np.sqrt(2 * (len(rb_data) - 1)),
"reblocks": n_reblock,
}
return pd.DataFrame(d) 
Example 3
def test_sem(self):
# example from stats.sem doc
a = np.arange(20).reshape(5,4)
am = np.ma.array(a)
r = stats.sem(a,ddof=1)
rm = stats.mstats.sem(am, ddof=1)

assert_allclose(r, 2.82842712, atol=1e-5)
assert_allclose(rm, 2.82842712, atol=1e-5)

for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
stats.sem(x, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
stats.sem(y, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
stats.sem(x, axis=None, ddof=1), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 4
def test_sem(self):
# This is not in R, so used:
#     sqrt(var(testcase)*3/4)/sqrt(3)

# y = stats.sem(self.shoes[0])
# assert_approx_equal(y,0.775177399)
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=RuntimeWarning)
y = stats.sem(self.scalar_testcase)
assert_(np.isnan(y))

y = stats.sem(self.testcase)
assert_approx_equal(y, 0.6454972244)
n = len(self.testcase)
assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
stats.sem(self.testcase, ddof=2))

x = np.arange(10.)
x[9] = np.nan
assert_equal(stats.sem(x), np.nan)
assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
assert_raises(ValueError, stats.sem, x, nan_policy='raise')
assert_raises(ValueError, stats.sem, x, nan_policy='foobar') 
Example 5
 Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_mstats_basic.py    GNU General Public License v3.0 6 votes
def test_sem(self):
# example from stats.sem doc
a = np.arange(20).reshape(5,4)
am = np.ma.array(a)
r = stats.sem(a,ddof=1)
rm = stats.mstats.sem(am, ddof=1)

assert_allclose(r, 2.82842712, atol=1e-5)
assert_allclose(rm, 2.82842712, atol=1e-5)

for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
stats.sem(x, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
stats.sem(y, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
stats.sem(x, axis=None, ddof=1), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 6
def test_sem(self):
# example from stats.sem doc
a = np.arange(20).reshape(5, 4)
am = np.ma.array(a)
r = stats.sem(a, ddof=1)
rm = stats.mstats.sem(am, ddof=1)

assert_allclose(r, 2.82842712, atol=1e-5)
assert_allclose(rm, 2.82842712, atol=1e-5)

for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
stats.sem(x, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
stats.sem(y, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
stats.sem(x, axis=None, ddof=1), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 7
def test_sem(self):
# This is not in R, so used:
#     sqrt(var(testcase)*3/4)/sqrt(3)

# y = stats.sem(self.shoes[0])
# assert_approx_equal(y,0.775177399)
with suppress_warnings() as sup, np.errstate(invalid="ignore"):
sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
y = stats.sem(self.scalar_testcase)
assert_(np.isnan(y))

y = stats.sem(self.testcase)
assert_approx_equal(y, 0.6454972244)
n = len(self.testcase)
assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
stats.sem(self.testcase, ddof=2))

x = np.arange(10.)
x[9] = np.nan
assert_equal(stats.sem(x), np.nan)
assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
assert_raises(ValueError, stats.sem, x, nan_policy='raise')
assert_raises(ValueError, stats.sem, x, nan_policy='foobar') 
Example 8
def test_sem(self):
# example from stats.sem doc
a = np.arange(20).reshape(5,4)
am = np.ma.array(a)
r = stats.sem(a,ddof=1)
rm = stats.mstats.sem(am, ddof=1)

assert_allclose(r, 2.82842712, atol=1e-5)
assert_allclose(rm, 2.82842712, atol=1e-5)

for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
stats.sem(x, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
stats.sem(y, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
stats.sem(x, axis=None, ddof=1), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 9
def test_sem(self):
# This is not in R, so used:
#     sqrt(var(testcase)*3/4)/sqrt(3)

# y = stats.sem(self.shoes[0])
# assert_approx_equal(y,0.775177399)
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=RuntimeWarning)
y = stats.sem(self.scalar_testcase)
assert_(np.isnan(y))

y = stats.sem(self.testcase)
assert_approx_equal(y, 0.6454972244)
n = len(self.testcase)
assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
stats.sem(self.testcase, ddof=2))

x = np.arange(10.)
x[9] = np.nan
assert_equal(stats.sem(x), np.nan)
assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
assert_raises(ValueError, stats.sem, x, nan_policy='raise')
assert_raises(ValueError, stats.sem, x, nan_policy='foobar') 
Example 10
def test_sem(self):
# example from stats.sem doc
a = np.arange(20).reshape(5, 4)
am = np.ma.array(a)
r = stats.sem(a, ddof=1)
rm = stats.mstats.sem(am, ddof=1)

assert_allclose(r, 2.82842712, atol=1e-5)
assert_allclose(rm, 2.82842712, atol=1e-5)

for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
stats.sem(x, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
stats.sem(y, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
stats.sem(x, axis=None, ddof=1), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 11
def test_sem(self):
# This is not in R, so used:
#     sqrt(var(testcase)*3/4)/sqrt(3)

# y = stats.sem(self.shoes[0])
# assert_approx_equal(y,0.775177399)
with suppress_warnings() as sup, np.errstate(invalid="ignore"):
sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
y = stats.sem(self.scalar_testcase)
assert_(np.isnan(y))

y = stats.sem(self.testcase)
assert_approx_equal(y, 0.6454972244)
n = len(self.testcase)
assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
stats.sem(self.testcase, ddof=2))

x = np.arange(10.)
x[9] = np.nan
assert_equal(stats.sem(x), np.nan)
assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
assert_raises(ValueError, stats.sem, x, nan_policy='raise')
assert_raises(ValueError, stats.sem, x, nan_policy='foobar') 
Example 12
def test_sem(self):
# example from stats.sem doc
a = np.arange(20).reshape(5,4)
am = np.ma.array(a)
r = stats.sem(a,ddof=1)
rm = stats.mstats.sem(am, ddof=1)

assert_allclose(r, 2.82842712, atol=1e-5)
assert_allclose(rm, 2.82842712, atol=1e-5)

for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
stats.sem(x, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
stats.sem(y, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
stats.sem(x, axis=None, ddof=1), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 13
def test_sem(self):
# This is not in R, so used:
#     sqrt(var(testcase)*3/4)/sqrt(3)

# y = stats.sem(self.shoes[0])
# assert_approx_equal(y,0.775177399)
with suppress_warnings() as sup, np.errstate(invalid="ignore"):
sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice")
y = stats.sem(self.scalar_testcase)
assert_(np.isnan(y))

y = stats.sem(self.testcase)
assert_approx_equal(y, 0.6454972244)
n = len(self.testcase)
assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
stats.sem(self.testcase, ddof=2))

x = np.arange(10.)
x[9] = np.nan
assert_equal(stats.sem(x), np.nan)
assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
assert_raises(ValueError, stats.sem, x, nan_policy='raise')
assert_raises(ValueError, stats.sem, x, nan_policy='foobar') 
Example 14
def ExtractCoverage(ConsFile):
'''
(file) -> float, float

:param ConsFile: Consensus file with raw depth at each position within a given region (ie. not merged)

Return a tuple with the mean read depth within that interval and the standard error of the mean
'''

L = []
infile = open(ConsFile)
for line in infile:
if 'chr' in line:
line = line.rstrip().split('\t')
if fam == '0':
infile.close()
M = np.mean(L)
sem = stats.sem(L)
return M, sem 
Example 15
def GetSampleCoverage(L):
'''
(list) -> dict

:param L: A list of full paths to consensus files with umi count per interval (ie. files not merged and not empty)

Returns a dictionary of interval coordinates with a list with mean and s.e.m. of coverage within the interval
'''

D = {}
for filename in L:
# extract region from filename
region = FormatRegion(filename)
M, sem = ExtractCoverage(filename)
D[region] = [M, sem]
return D 
Example 16
def test_single_layer_stats(self):

p = cat._CategoricalStatPlotter()

g = pd.Series(np.repeat(list("abc"), 100))
y = pd.Series(np.random.RandomState(0).randn(300))

p.establish_variables(g, y)
p.estimate_statistic(np.mean, 95, 10000)

nt.assert_equal(p.statistic.shape, (3,))
nt.assert_equal(p.confint.shape, (3, 2))

npt.assert_array_almost_equal(p.statistic,
y.groupby(g).mean())

for ci, (_, grp_y) in zip(p.confint, y.groupby(g)):
sem = stats.sem(grp_y)
mean = grp_y.mean()
stats.norm.ppf(.975)
half_ci = stats.norm.ppf(.975) * sem
ci_want = mean - half_ci, mean + half_ci
npt.assert_array_almost_equal(ci_want, ci, 2) 
Example 17
def test_single_layer_stats_with_missing_data(self):

p = cat._CategoricalStatPlotter()

g = pd.Series(np.repeat(list("abc"), 100))
y = pd.Series(np.random.RandomState(0).randn(300))

p.establish_variables(g, y, order=list("abdc"))
p.estimate_statistic(np.mean, 95, 10000)

nt.assert_equal(p.statistic.shape, (4,))
nt.assert_equal(p.confint.shape, (4, 2))

mean = y[g == "b"].mean()
sem = stats.sem(y[g == "b"])
half_ci = stats.norm.ppf(.975) * sem
ci = mean - half_ci, mean + half_ci
npt.assert_almost_equal(p.statistic[1], mean)
npt.assert_array_almost_equal(p.confint[1], ci, 2)

npt.assert_equal(p.statistic[2], np.nan)
npt.assert_array_equal(p.confint[2], (np.nan, np.nan)) 
Example 18
def test_nested_stats(self):

p = cat._CategoricalStatPlotter()

g = pd.Series(np.repeat(list("abc"), 100))
h = pd.Series(np.tile(list("xy"), 150))
y = pd.Series(np.random.RandomState(0).randn(300))

p.establish_variables(g, y, h)
p.estimate_statistic(np.mean, 95, 50000)

nt.assert_equal(p.statistic.shape, (3, 2))
nt.assert_equal(p.confint.shape, (3, 2, 2))

npt.assert_array_almost_equal(p.statistic,
y.groupby([g, h]).mean().unstack())

for ci_g, (_, grp_y) in zip(p.confint, y.groupby(g)):
for ci, hue_y in zip(ci_g, [grp_y[::2], grp_y[1::2]]):
sem = stats.sem(hue_y)
mean = hue_y.mean()
half_ci = stats.norm.ppf(.975) * sem
ci_want = mean - half_ci, mean + half_ci
npt.assert_array_almost_equal(ci_want, ci, 2) 
Example 19
def test_sem(self):
# example from stats.sem doc
a = np.arange(20).reshape(5,4)
am = np.ma.array(a)
r = stats.sem(a,ddof=1)
rm = stats.mstats.sem(am, ddof=1)

assert_allclose(r, 2.82842712, atol=1e-5)
assert_allclose(rm, 2.82842712, atol=1e-5)

for n in self.get_n():
x, y, xm, ym = self.generate_xy_sample(n)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
stats.sem(x, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
stats.sem(y, axis=None, ddof=0), decimal=13)
assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
stats.sem(x, axis=None, ddof=1), decimal=13)
assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
stats.sem(y, axis=None, ddof=1), decimal=13) 
Example 20
def test_sem(self):
# This is not in R, so used:
#     sqrt(var(testcase)*3/4)/sqrt(3)

# y = stats.sem(self.shoes[0])
# assert_approx_equal(y,0.775177399)
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=RuntimeWarning)
y = stats.sem(self.scalar_testcase)
assert_(np.isnan(y))

y = stats.sem(self.testcase)
assert_approx_equal(y, 0.6454972244)
n = len(self.testcase)
assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
stats.sem(self.testcase, ddof=2))

x = np.arange(10.)
x[9] = np.nan
assert_equal(stats.sem(x), np.nan)
assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
assert_raises(ValueError, stats.sem, x, nan_policy='raise')
assert_raises(ValueError, stats.sem, x, nan_policy='foobar') 
Example 21
def _evaluate_predictors(X, predictors, y, groups, cross_validator, scorer):
run_infos = []
for predictor in predictors:
start = datetime.utcnow()

scores = _get_scores(X, y, groups, cross_validator, predictor, scorer)

end = datetime.utcnow()

scores_array = np.hstack(scores)
mean_score = scores_array.mean()
sem_score = sem(scores_array)
results = {'test_score': mean_score, 'test_score_sem': sem_score}

model_info = get_model_info(predictor)

run_info = {
'run_start': str(start),
'run_end': str(end),
'run_seconds': (end - start).total_seconds(),
'results': results,
'model_info': model_info,
}
run_infos.append(run_info)
return run_infos 
Example 22
 Project: conan   Author: HITS-MBM   File: conan.py    GNU General Public License v3.0 6 votes
def block_analysis(vectors, nres, dt):
b = np.array(vectors)
i = 0
block_outf = open("blocking/block_out.txt", "w+")
print("dt=",dt)
block_outf.write("#N_iter N_blocks stderr (nm)  stderr_err (nm) corr.time est. (ps)\n")
while (len(b) >= 2):
sems = sem(b)
sem_avg = np.linalg.norm(sems)/nres
sem_avg_err = sem_avg*(1.0 / np.sqrt(2*(len(b)-1)) )
if (i == 0):
sem_avg0 = sem_avg
block_outf.write(" %4i %9i %12.3e %12.3e   %15.2f\n"%(i, len(b), sem_avg, sem_avg_err, dt*(sem_avg/sem_avg0)**2 ) )
c = [ 0.5* (b[2*i] + b[2*i+1]) for i in range(int(len(b)/2)) ]
b = np.array(c)
i = i+1
block_outf.close() 
Example 23
def reblock_summary(df, nblocks):
df = reblock(df, nblocks)
serr = df.sem()
d = {
"mean": df.mean(axis=0),
"standard error": serr,
"standard error error": serr / np.sqrt(2 * (len(df) - 1)),
"n_blocks": nblocks,
}
return pd.DataFrame(d) 
Example 24
def opt_block(df):
"""
Finds optimal block size for each variable in a dataset
df is a dataframe where each row is a sample and each column is a calculated quantity
reblock each column over samples to find the best block size
Returns optimal_block, a 1D array with the optimal size for each column in df
"""
newdf = df.copy()
iblock = 0
ndata, nvariables = tuple(df.shape[:2])
optimal_block = np.array([float("NaN")] * nvariables)
serr0 = df.sem(axis=0).values
statslist = []
while newdf.shape[0] > 1:
serr = newdf.sem(axis=0).values
serrerr = serr / (2 * (newdf.shape[0] - 1)) ** 0.5
statslist.append((iblock, serr.copy()))

n = newdf.shape[0]
lasteven = n - int(n % 2 == 1)
newdf = (newdf[:lasteven:2] + newdf[1::2].values) / 2
iblock += 1
for iblock, serr in reversed(statslist):
B3 = 2 ** (3 * iblock)
inds = np.where(B3 >= 2 * ndata * (serr / serr0) ** 4)[0]
optimal_block[inds] = iblock

return optimal_block 
Example 25
def test_reblocking():
"""
Tests reblocking against known distribution.
"""
from scipy.stats import sem

def corr_data(N, L):
"""
Creates correlated data. Taken from
"""
return np.convolve(np.random.randn(2 ** N), np.ones(2 ** L) / 10, "same")

n = 11
cols = ["test_data1", "test_data2"]
dat1 = corr_data(n, 4)
dat2 = corr_data(n, 7)
test_data = pd.DataFrame(data={cols[0]: dat1, cols[1]: dat2})
reblocked_data = optimally_reblocked(test_data[cols])
for c in cols:
row = reblocked_data.loc[c]
reblocks = reblocked_data["reblocks"].values[0]
std_err = sem(reblock_by2(test_data, reblocks, c))
std_err_err = std_err / np.sqrt(2 * (2 ** (n - reblocks) - 1))

assert np.isclose(
row["mean"], np.mean(test_data[c]), 1e-10, 1e-12
), "Means are not equal"
assert np.isclose(
row["standard error"], std_err, 1e-10, 1e-12
), "Standard errors are not equal"
assert np.isclose(
row["standard error error"], std_err_err, 1e-10, 1e-12
), "Standard error errors are not equal"

statlist = ["mean", "sem", lambda x: x.sem() / np.sqrt(2 * (len(x) - 1))]
rb1 = reblock(test_data, len(test_data) // 4).agg(statlist).T
rb2 = reblock_by2(test_data, 2).agg(statlist).T
for c in rb1.columns:
assert np.isclose(rb1[c], rb2[c], 1e-10, 1e-12).all(), (c, rb1[c], rb2[c]) 
Example 26
def evaluate_cross_validation(clf, X, y, K):

# create a k-fold cross validation iterator
cv = KFold(len(y), K, shuffle=True, random_state=0)
# by default the score used is the one returned by score method of the estimator (accuracy)
scores = cross_val_score(clf, X, y, cv=cv)
print (scores)
print ("Mean score: {0:.3f} (+/-{1:.3f})".format(numpy.mean(scores), sem(scores))) 
Example 27
def test_sem(self):
# This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
y = mstats.sem(self.testcase)
assert_almost_equal(y, 0.6454972244)
n = self.testcase.count()
assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
mstats.sem(self.testcase, ddof=2)) 
Example 28
def scipy_sem(*args, **kwargs):
from scipy.stats import sem

return sem(*args, ddof=1, **kwargs) 
Example 29
def test_nansem(self, ddof):
from scipy.stats import sem
with np.errstate(invalid='ignore'):
self.check_funs(nanops.nansem, sem, allow_complex=False,
allow_str=False, allow_date=False,
allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 30
def test_ops_general():
ops = [('mean', np.mean),
('median', np.median),
('std', np.std),
('var', np.var),
('sum', np.sum),
('prod', np.prod),
('min', np.min),
('max', np.max),
('first', lambda x: x.iloc[0]),
('last', lambda x: x.iloc[-1]),
('count', np.size), ]
try:
from scipy.stats import sem
except ImportError:
pass
else:
ops.append(('sem', sem))
df = DataFrame(np.random.randn(1000))
labels = np.random.randint(0, 50, size=1000).astype(float)

for op, targop in ops:
result = getattr(df.groupby(labels), op)().astype(float)
expected = df.groupby(labels).agg(targop)
try:
tm.assert_frame_equal(result, expected)
except BaseException as exc:
exc.args += ('operation: %s' % op, )
raise 
Example 31
 Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_mstats_basic.py    GNU General Public License v3.0 5 votes
def test_sem(self):
# This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
y = mstats.sem(self.testcase)
assert_almost_equal(y, 0.6454972244)
n = self.testcase.count()
assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
mstats.sem(self.testcase, ddof=2)) 
Example 32
 Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_stats.py    GNU General Public License v3.0 5 votes
def test_sem(self):
# This is not in R, so used:
#     sqrt(var(testcase)*3/4)/sqrt(3)

# y = stats.sem(self.shoes[0])
# assert_approx_equal(y,0.775177399)
y = stats.sem(self.testcase)
assert_approx_equal(y, 0.6454972244)
n = len(self.testcase)
assert_allclose(stats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
stats.sem(self.testcase, ddof=2)) 
Example 33
def test_nansem(self, ddof):
from scipy.stats import sem
with np.errstate(invalid='ignore'):
self.check_funs(nanops.nansem, sem, allow_complex=False,
allow_str=False, allow_date=False,
allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 34
def test_ops_general():
ops = [('mean', np.mean),
('median', np.median),
('std', np.std),
('var', np.var),
('sum', np.sum),
('prod', np.prod),
('min', np.min),
('max', np.max),
('first', lambda x: x.iloc[0]),
('last', lambda x: x.iloc[-1]),
('count', np.size), ]
try:
from scipy.stats import sem
except ImportError:
pass
else:
ops.append(('sem', sem))
df = DataFrame(np.random.randn(1000))
labels = np.random.randint(0, 50, size=1000).astype(float)

for op, targop in ops:
result = getattr(df.groupby(labels), op)().astype(float)
expected = df.groupby(labels).agg(targop)
try:
tm.assert_frame_equal(result, expected)
except BaseException as exc:
exc.args += ('operation: %s' % op, )
raise 
Example 35
def test_nansem(self, ddof):
from scipy.stats import sem
with np.errstate(invalid='ignore'):
self.check_funs(nanops.nansem, sem, allow_complex=False,
allow_str=False, allow_date=False,
allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 36
def test_ops_general():
ops = [('mean', np.mean),
('median', np.median),
('std', np.std),
('var', np.var),
('sum', np.sum),
('prod', np.prod),
('min', np.min),
('max', np.max),
('first', lambda x: x.iloc[0]),
('last', lambda x: x.iloc[-1]),
('count', np.size), ]
try:
from scipy.stats import sem
except ImportError:
pass
else:
ops.append(('sem', sem))
df = DataFrame(np.random.randn(1000))
labels = np.random.randint(0, 50, size=1000).astype(float)

for op, targop in ops:
result = getattr(df.groupby(labels), op)().astype(float)
expected = df.groupby(labels).agg(targop)
try:
tm.assert_frame_equal(result, expected)
except BaseException as exc:
exc.args += ('operation: %s' % op, )
raise 
Example 37
def test_sem(self):
# This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
y = mstats.sem(self.testcase)
assert_almost_equal(y, 0.6454972244)
n = self.testcase.count()
assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
mstats.sem(self.testcase, ddof=2)) 
Example 38
def test_sem(self):
"""
this is not in R, so used
sqrt(var(testcase)*3/4)/sqrt(3)
"""
#y = stats.sem(self.shoes[0])
#assert_approx_equal(y,0.775177399)
y = mstats.sem(self.testcase)
assert_almost_equal(y,0.6454972244) 
Example 39
def test_sem(self):
# This is not in R, so used:
#     sqrt(var(testcase)*3/4)/sqrt(3)

# y = stats.sem(self.shoes[0])
# assert_approx_equal(y,0.775177399)
y = stats.sem(self.testcase)
assert_approx_equal(y,0.6454972244) 
Example 40
def test_sem(self):
# This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
y = mstats.sem(self.testcase)
assert_almost_equal(y, 0.6454972244)
n = self.testcase.count()
assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
mstats.sem(self.testcase, ddof=2)) 
Example 41
def _plot_perf(regrets, ax, cmap, ylab='Simple Regret',
"""Plot the total simple regrets.
Args:
regrets: dict method -> ndarray, each row is a run's regret.
ax: pyplot ax object.
cmap: dict function name -> color.
"""
if ordering is None:
ordering = regrets.keys()
if log_plot:
if not (np.asarray([(reg > 0).all() for reg in regrets.values()]).all()):
global_min = min([np.min(reg) for reg in regrets.values()])
for reg in regrets.values():
reg += (0.01 - global_min)
ax.set_yscale('log', nonposy='clip')
max_val, min_val = float('-inf'), float('inf')
for method in ordering:
if method not in regrets:
continue
reg = regrets[method]
if ts is None:
plot_ts = range(reg.shape[1])
else:
plot_ts = ts
avg = np.mean(reg, axis=0)
max_avg, min_avg = max(avg), min(avg)
max_val = max_val if max_val > max_avg else max_avg
min_val = min_val if min_val < min_avg else min_avg
err = sem(reg, axis=0)
ax.plot(plot_ts, avg, c=cmap[method], label=method.upper(),
linewidth=2)
ax.fill_between(plot_ts, avg - err, avg + err, color=cmap[method],
alpha=0.4)
ax.set_xlabel('t')
ax.set_ylabel(ylab)
leg = ax.legend()
for legobj in leg.legendHandles:
legobj.set_linewidth(5.0) 
Example 42
def _plot_proportions(f_names, proportions, ax, cmap, add_legend=True,
ordering=None):
"""Plot histogram of proportions for functions.
Args:
f_names: Name of the functions.
proportions: dict method -> ndarray, each row is a run's proportion.
ax: pyplot ax object.
cmap: dict function name -> color.
"""
if ordering is None:
ordering = proportions.keys()
idx = 0
num_avgs = len(f_names)
width = 1 / (num_avgs + 1)
for method in ordering:
if method not in proportions:
continue
props = proportions[method]
avgs = np.mean(props, axis=0)
errs = sem(props, axis=0)
xs = 2 * np.arange(num_avgs) + width * idx
ax.bar(xs, avgs, width, yerr=errs, label=method.upper(), align='edge',
color=cmap[method])
idx += 1
ax.set_xticks(2 * np.arange(num_avgs) + width * num_avgs / 2)
tick_names = []
for fn in f_names:
if 'constant' in fn.lower():
tick_names.append('Constant')
else:
ax.set_xticklabels(tick_names)
ax.set_ylabel('Proportion')
ax.legend() 
Example 43
def test_sem(self):
# This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
y = mstats.sem(self.testcase)
assert_almost_equal(y, 0.6454972244)
n = self.testcase.count()
assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
mstats.sem(self.testcase, ddof=2)) 
Example 44
def get_correct_estimates_for_ru(files, ru_length=None, adVNTR=False):
count = 0
vntr_results = {}
if len(files) == 0: ###################TEMP
return 0, 0
for file_name in files:
count += 1
vntr_id = int(file_name.split('_')[-3])
if vntr_id not in vntr_results.keys():
vntr_results[vntr_id] = []
sim = int(file_name.split('_')[-2])
correct = False
with open(file_name) as input:
if len(lines) > 1:
if lines[-1].strip() != 'None' and len(lines[-1]) < 10:
estimate = int(float(lines[-1].strip()))
if estimate == sim:
correct = True
if correct:
vntr_results[vntr_id].append(1)
else:
vntr_results[vntr_id].append(0)
vntr_averages = {}
for vntr_id in vntr_results.keys():
vntr_averages[vntr_id] = sum(vntr_results[vntr_id]) / float(len(vntr_results[vntr_id])) * 100
correct_ratio = sum(vntr_averages.values()) / float(len(vntr_averages.values()))
from scipy import stats
error_bar = stats.sem([e for e in vntr_averages.values()])
return correct_ratio, error_bar 
Example 45
def test_sem(self):
# This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
y = mstats.sem(self.testcase)
assert_almost_equal(y, 0.6454972244)
n = self.testcase.count()
assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
mstats.sem(self.testcase, ddof=2)) 
Example 46
 Project: tschdata   Author: tum-lkn   File: toolbox.py    GNU General Public License v3.0 5 votes
def mean_confidence_interval(data, confidence=0.95):
a = 1.0*np.array(data)
n = len(a)
m, se = np.mean(a), st.sem(a)
h = se * st.t._ppf((1+confidence)/2., n-1)
return h 
Example 47
def confidenceInterval(values, mean: float, confidence: float=0.99) -> (float,
float):
"""Calculates confidence interval using Student's t-distribution and
standard error of mean"""
return stats.t.interval(confidence, len(values) - 1, loc=mean,
scale=stats.sem(values)) 
Example 48
def mean_confidence_interval(data, confidence=0.95):
a = 1.0 * np.array(data)
n = len(a)
m, se = np.mean(a), st.sem(a)
h = se * st.t._ppf((1 + confidence) / 2., n - 1)
return m, m-h, m+h 
Example 49
def test_nansem(self, ddof):
from scipy.stats import sem
with np.errstate(invalid='ignore'):
self.check_funs(nanops.nansem, sem, allow_complex=False,
allow_str=False, allow_date=False,
allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 50
def test_ops_general():
ops = [('mean', np.mean),
('median', np.median),
('std', np.std),
('var', np.var),
('sum', np.sum),
('prod', np.prod),
('min', np.min),
('max', np.max),
('first', lambda x: x.iloc[0]),
('last', lambda x: x.iloc[-1]),
('count', np.size), ]
try:
from scipy.stats import sem
except ImportError:
pass
else:
ops.append(('sem', sem))
df = DataFrame(np.random.randn(1000))
labels = np.random.randint(0, 50, size=1000).astype(float)

for op, targop in ops:
result = getattr(df.groupby(labels), op)().astype(float)
expected = df.groupby(labels).agg(targop)
try:
tm.assert_frame_equal(result, expected)
except BaseException as exc:
exc.args += ('operation: %s' % op, )
raise 
Example 51
def test_nansem(self, ddof):
from scipy.stats import sem
with np.errstate(invalid='ignore'):
self.check_funs(nanops.nansem, sem, allow_complex=False,
allow_str=False, allow_date=False,
allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 52
def test_ops_general():
ops = [('mean', np.mean),
('median', np.median),
('std', np.std),
('var', np.var),
('sum', np.sum),
('prod', np.prod),
('min', np.min),
('max', np.max),
('first', lambda x: x.iloc[0]),
('last', lambda x: x.iloc[-1]),
('count', np.size), ]
try:
from scipy.stats import sem
except ImportError:
pass
else:
ops.append(('sem', sem))
df = DataFrame(np.random.randn(1000))
labels = np.random.randint(0, 50, size=1000).astype(float)

for op, targop in ops:
result = getattr(df.groupby(labels), op)().astype(float)
expected = df.groupby(labels).agg(targop)
try:
tm.assert_frame_equal(result, expected)
except BaseException as exc:
exc.args += ('operation: %s' % op, )
raise 
Example 53
 Project: PyRouge   Author: magic282   File: Rouge.py    GNU General Public License v3.0 5 votes
def get_mean_sd_internal(self, x):
mean = np.mean(x)
sd = st.sem(x)
res = st.t.interval(0.95, len(x) - 1, loc=mean, scale=sd)
return (mean, sd, res) 
Example 54
def test_nested_stats_with_missing_data(self):

p = cat._CategoricalStatPlotter()

g = pd.Series(np.repeat(list("abc"), 100))
y = pd.Series(np.random.RandomState(0).randn(300))
h = pd.Series(np.tile(list("xy"), 150))

p.establish_variables(g, y, h,
order=list("abdc"),
hue_order=list("zyx"))
p.estimate_statistic(np.mean, 95, 50000)

nt.assert_equal(p.statistic.shape, (4, 3))
nt.assert_equal(p.confint.shape, (4, 3, 2))

mean = y[(g == "b") & (h == "x")].mean()
sem = stats.sem(y[(g == "b") & (h == "x")])
half_ci = stats.norm.ppf(.975) * sem
ci = mean - half_ci, mean + half_ci
npt.assert_almost_equal(p.statistic[1, 2], mean)
npt.assert_array_almost_equal(p.confint[1, 2], ci, 2)

npt.assert_array_equal(p.statistic[:, 0], [np.nan] * 4)
npt.assert_array_equal(p.statistic[2], [np.nan] * 3)
npt.assert_array_equal(p.confint[:, 0],
np.zeros((4, 2)) * np.nan)
npt.assert_array_equal(p.confint[2],
np.zeros((3, 2)) * np.nan) 
Example 55
def test_nansem(self, ddof):
from scipy.stats import sem
with np.errstate(invalid='ignore'):
self.check_funs(nanops.nansem, sem, allow_complex=False,
allow_str=False, allow_date=False,
allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 56
 Project: HistoGAN   Author: BMIRDS   File: accuracy_tester.py    GNU General Public License v3.0 5 votes
def get_confidence_interval(class_accuracies, confidence):
print(st.t.interval(CONFIDENCE_LEVEL, len(class_accuracies)-1,
loc=np.mean(class_accuracies),
scale=st.sem(class_accuracies))) 
Example 57
def test_nansem(self):
tm.skip_if_no_package('scipy', min_version='0.17.0')
from scipy.stats import sem
with np.errstate(invalid='ignore'):
self.check_funs_ddof(nanops.nansem, sem, allow_complex=False,
allow_str=False, allow_date=False,
allow_tdelta=False, allow_obj='convert') 
Example 58
def test_ops_general(self):
ops = [('mean', np.mean),
('median', np.median),
('std', np.std),
('var', np.var),
('sum', np.sum),
('prod', np.prod),
('min', np.min),
('max', np.max),
('first', lambda x: x.iloc[0]),
('last', lambda x: x.iloc[-1]),
('count', np.size), ]
try:
from scipy.stats import sem
except ImportError:
pass
else:
ops.append(('sem', sem))
df = DataFrame(np.random.randn(1000))
labels = np.random.randint(0, 50, size=1000).astype(float)

for op, targop in ops:
result = getattr(df.groupby(labels), op)().astype(float)
expected = df.groupby(labels).agg(targop)
try:
tm.assert_frame_equal(result, expected)
except BaseException as exc:
exc.args += ('operation: %s' % op, )
raise 
Example 59
def test_sem(self):
# This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
y = mstats.sem(self.testcase)
assert_almost_equal(y, 0.6454972244)
n = self.testcase.count()
assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
mstats.sem(self.testcase, ddof=2)) 
Example 60
def _comp_mean_ci(sample, confidence=0.95):
"""Copmute mean and confidence interval from distribution"""
if np.all(sample == sample[0]):
return (sample[0],) * 3
else:
mean = np.mean(sample)
ci = st.t.interval(confidence, len(sample), loc=mean, scale=st.sem(sample))
return mean, ci[0], ci[1] 
Example 61
def bootstrap(experiment_name, test_name, net, n_roles, output_list=None, alpha=0.99, n_b=100):
print ('Bootstrap method: ')
if not output_list:
output_list = np.loadtxt('output_' + experiment_name + '.' + test_name.strip('.dat') + '.csv', delimiter = ',')

rng = np.random
P_list, R_list, F1_list = [], [], []
for i in range(n_b):
rand_len = len(output_list)
indices = rng.randint(0, rand_len, rand_len)
conMat = np.zeros((n_roles, n_roles))
for idx in indices:
x = int(output_list[idx][0])
y = int(output_list[idx][1])
conMat[x, y] += 1
dir_P, dir_R, dir_F1, precision, recall, F1 = stats(net, conMat)
P_list.append(dir_P)
R_list.append(dir_R)
F1_list.append(dir_F1)

P_array, R_array, F1_array = np.array(P_list), np.array(R_list), np.array(F1_list)
P_mean, P_std = np.mean(P_array), np.std(P_array)
R_mean, R_std = np.mean(R_array), np.std(R_array)
F1_mean, F1_std = np.mean(F1_array), np.std(F1_array)
P_t_int = st.t.interval(alpha, len(P_array)-1, loc=np.mean(P_array), scale=st.sem(P_array))
R_t_int = st.t.interval(alpha, len(R_array)-1, loc=np.mean(R_array), scale=st.sem(R_array))
F1_t_int = st.t.interval(alpha, len(F1_array)-1, loc=np.mean(F1_array), scale=st.sem(F1_array))

print ('Precision: \t %.2f \t %.2f \t %.2f \t %.2f' % (P_mean, P_std, P_t_int[0], P_t_int[1]))
print ('Recall:    \t %.2f \t %.2f \t %.2f \t %.2f' % (R_mean, R_std, R_t_int[0], R_t_int[1]))
print ('F1:        \t %.2f \t %.2f \t %.2f \t %.2f' % (F1_mean, F1_std, F1_t_int[0], F1_t_int[1]))

return P_mean, P_std, R_mean, R_std, F1_mean, F1_std 
Example 62
def t_EB(x, alpha=0.05, axis=-1):
"""Get t-statistic based error bars on mean of x.

Parameters
----------
x : :class:numpy:numpy.ndarray of shape (n_samples,)
Data points to estimate mean. Must not be empty or contain NaN.
alpha : float
The alpha level (1-confidence) probability (in (0, 1)) to construct confidence interval from t-statistic.
axis : int
The axis on x where we compute the t-statistics. The function is vectorized over all other dimensions.

Returns
-------
EB : float
Size of error bar on mean (>= 0). The confidence interval is [mean(x) - EB, mean(x) + EB]. EB is
inf when len(x) <= 1. Will be NaN if there are any infinite values in x.
"""
assert np.ndim(x) >= 1 and (not np.any(np.isnan(x)))
assert np.ndim(alpha) == 0
assert 0.0 < alpha and alpha < 1.0

N = np.shape(x)[axis]
if N <= 1:
return np.full(np.sum(x, axis=axis).shape, fill_value=np.inf)

confidence = 1 - alpha
# loc cancels out when we just want EB anyway
LB, UB = sst.t.interval(confidence, N - 1, loc=0.0, scale=1.0)
assert not (LB > UB)
# Just multiplying scale=ss.sem(x) is better for when scale=0
EB = 0.5 * sst.sem(x, axis=axis) * (UB - LB)
return EB 
Example 63
 Project: senior-design   Author: james-tate   File: test_stats.py    GNU General Public License v2.0 5 votes
def test_sem(self):
"""
this is not in R, so used
sqrt(var(testcase)*3/4)/sqrt(3)
"""
#y = stats.sem(self.shoes[0])
#assert_approx_equal(y,0.775177399)
y = stats.sem(self.testcase)
assert_approx_equal(y,0.6454972244) 
Example 64
def get_depth_for_regions(bam_fields, alt_breakpoints, ref_breakpoints, min_mapping_quality, min_base_quality):
"""
Return median minus standard deviation of read depth across variant
breakpoints in the given alternate and reference haplotypes.
"""
# Convert breakpoints to a list for easier indexing.
alt_regions = ["%s:%s-%s" % alt_breakpoint for alt_breakpoint in alt_breakpoints]
ref_regions = ["%s:%s-%s" % ref_breakpoint for ref_breakpoint in ref_breakpoints]
# alt_region_size = alt_breakpoints[2] - alt_breakpoints[1]
# ref_region_size = ref_breakpoints[2] - ref_breakpoints[1]
# logger.debug("Min depth for alt region %s (size: %s)", alt_region, alt_region_size)
# logger.debug("Min depth for ref region %s (size: %s)", ref_region, ref_region_size)

best_alignments = get_best_alignments(bam_fields["file"], ref_regions + alt_regions, quality=min_mapping_quality)
depth_by_reference_and_position = get_depth_by_reference_and_position(best_alignments, bam_fields["file"], min_base_quality)

alt_depths = [depth_by_reference_and_position.get(alt_breakpoint[0], {}).get(i, 0)
for alt_breakpoint in alt_breakpoints
for i in xrange(alt_breakpoint[1], alt_breakpoint[2] + 1)]
ref_depths = [depth_by_reference_and_position.get(ref_breakpoint[0], {}).get(i, 0)
for ref_breakpoint in ref_breakpoints
for i in xrange(ref_breakpoint[1], ref_breakpoint[2] + 1)]

logger.debug("Found %i depths for regions %s: %s", len(alt_depths), alt_regions, alt_depths)
logger.debug("Found %i depths for regions %s: %s", len(ref_depths), ref_regions, ref_depths)
logger.debug("Median alt depths: %s", np.median(alt_depths))
logger.debug("Standard deviation alt depths: %s", np.std(alt_depths))
logger.debug("Median ref depths: %s", np.median(ref_depths))
logger.debug("Standard deviation ref depths: %s", np.std(ref_depths))
high_quality_alt_depth = max(int(np.round(np.median(alt_depths) - sem(alt_depths))), 0)
high_quality_ref_depth = max(int(np.round(np.median(ref_depths) - sem(ref_depths))), 0)

return high_quality_alt_depth, high_quality_ref_depth 
Example 65
def mean_confidence_interval(data, confidence=0.9):
"""Calculation of mean confidence interval.

Args:
data: the data for which the confidence interval has to be found
confidence: the confidence threshold

Returns:
mean of data, mean confidence interval
"""
mean, sem, m = np.mean(data), st.sem(data), st.t.ppf((1 + confidence) / 2.,
len(data) - 1)
return mean, m * sem 
Example 66
def confidenceinterval(x: List[float], conf_level=0.95):
"""
获得置信区间
"""
a = 1.0 * np.array(x)
n = len(a)
m, se = np.mean(a), sem(a)
h = se * t._ppf((1 + conf_level) / 2., n - 1)
return m - h, m + h 
Example 67
def fit_evaluate_cv(model, X, y):
model.fit(X, y)
cv = StratifiedKFold(y, 10, shuffle=True, random_state=0)
scores = cross_val_score(model, X, y, cv=cv, scoring='f1')
print(scores)
print("Mean F1 score: %0.3f (+/- %0.3f)" % (np.mean(scores), sem(scores)))
precision = cross_val_score(model, X, y, cv=cv, scoring='precision')
print("Mean precision: %0.3f" % (np.mean(precision)))
recall = cross_val_score(model, X, y, cv=cv, scoring='recall')
print("Mean recall: %0.3f" % (np.mean(recall)))

# Feature selection with 10-fold cross validation 
Example 68
 Project: elasticintel   Author: securityclippy   File: test_nanops.py    GNU General Public License v3.0 5 votes
def test_nansem(self):
tm.skip_if_no_package('scipy', min_version='0.17.0')
from scipy.stats import sem
with np.errstate(invalid='ignore'):
self.check_funs_ddof(nanops.nansem, sem, allow_complex=False,
allow_str=False, allow_date=False,
allow_tdelta=False, allow_obj='convert') 
Example 69
 Project: elasticintel   Author: securityclippy   File: test_groupby.py    GNU General Public License v3.0 5 votes
def test_ops_general(self):
ops = [('mean', np.mean),
('median', np.median),
('std', np.std),
('var', np.var),
('sum', np.sum),
('prod', np.prod),
('min', np.min),
('max', np.max),
('first', lambda x: x.iloc[0]),
('last', lambda x: x.iloc[-1]),
('count', np.size), ]
try:
from scipy.stats import sem
except ImportError:
pass
else:
ops.append(('sem', sem))
df = DataFrame(np.random.randn(1000))
labels = np.random.randint(0, 50, size=1000).astype(float)

for op, targop in ops:
result = getattr(df.groupby(labels), op)().astype(float)
expected = df.groupby(labels).agg(targop)
try:
tm.assert_frame_equal(result, expected)
except BaseException as exc:
exc.args += ('operation: %s' % op, )
raise 
Example 70
 Project: treecall   Author: nh3   File: treecall.py    GNU General Public License v2.0 5 votes
def partition_main(args):
print(args, file=sys.stderr)
base_prior = make_base_prior(args.het, GTYPE3) # base genotype prior
mm,mm0,mm1 = make_mut_matrix(args.mu, GTYPE3) # substitution rate matrix, with non-diagonal set to 0, with diagonal set to 0

vcffile, variants, DPRs, PLs = read_vcf(args.vcf, args.min_ev)
n_site,n_smpl = PLs.shape[0:2]

tree = Tree()
if sem(PLs[...,1],axis=1).mean() > sem(PLs[...,2],axis=1).mean():
partition(PLs[...,0:2], tree, np.arange(n_smpl), args.min_ev)
else:
partition(PLs, tree, np.arange(n_smpl), args.min_ev)

init_tree(tree)
PLs = PLs.astype(np.longdouble)
populate_tree_PL(tree, PLs, mm, 'PL')
populate_tree_PL(tree, PLs, mm0, 'PL0')
calc_mut_likelihoods(tree, mm0, mm1)

print(tree)
tree.write(outfile=args.output+'.pt0.nwk', format=5)
best_tree,best_PL = recursive_NNI(tree, mm0, mm1, base_prior)
best_tree,best_PL = recursive_reroot(best_tree, mm0, mm1, base_prior)
print(best_tree)
print('PL_per_site = %.4f' % (best_PL/n_site))
best_tree.write(outfile=args.output+'.pt.nwk', format=5) 
Example 71
def compute_stats(list_dict, pkey, skey):
sample = [sam[pkey][skey] for sam in list_dict]
mean = round(np.mean(sample), 3)
sem = round(stats.sem(sample), 3)
return mean, sem 
Example 72
def test_nansem(self, ddof):
from scipy.stats import sem
with np.errstate(invalid='ignore'):
self.check_funs(nanops.nansem, sem, allow_complex=False,
allow_str=False, allow_date=False,
allow_tdelta=False, allow_obj='convert', ddof=ddof) 
Example 73
def test_ops_general():
ops = [('mean', np.mean),
('median', np.median),
('std', np.std),
('var', np.var),
('sum', np.sum),
('prod', np.prod),
('min', np.min),
('max', np.max),
('first', lambda x: x.iloc[0]),
('last', lambda x: x.iloc[-1]),
('count', np.size), ]
try:
from scipy.stats import sem
except ImportError:
pass
else:
ops.append(('sem', sem))
df = DataFrame(np.random.randn(1000))
labels = np.random.randint(0, 50, size=1000).astype(float)

for op, targop in ops:
result = getattr(df.groupby(labels), op)().astype(float)
expected = df.groupby(labels).agg(targop)
try:
tm.assert_frame_equal(result, expected)
except BaseException as exc:
exc.args += ('operation: %s' % op, )
raise 
Example 74
 Project: nmsat   Author: rcfduarte   File: visualization.py    GNU General Public License v2.0 4 votes
def plot_acc(t, accs, fit_params, acc_function, title='', ax=None, display=True, save=False):
"""
Plot autocorrelation decay and exponential fit (can be used for other purposes where an exponential fit to the
data is suitable
:param t:
:param accs:
:param fit_params:
:param acc_function:
:param title:
:param ax:
:param display:
:param save:
:return:
"""
if (ax is not None) and (not isinstance(ax, mpl.axes.Axes)):
raise ValueError('ax must be matplotlib.axes.Axes instance.')

if ax is None:
fig = pl.figure()
fig.suptitle(title)
else:
ax.set_title(title)

for n in range(accs.shape[0]):
ax.plot(t, accs[n, :], alpha=0.1, lw=0.1, color='k')

error = np.sum((np.mean(accs, 0) - acc_function(t, *fit_params)) ** 2)
label = r'$a = {0}, b = {1}, {2}={3}, MSE = {4}$'.format(str(np.round(fit_params[0], 2)), str(np.round(fit_params[
1], 2)), r'\tau_{int}', str(np.round(fit_params[2], 2)), str(error))
ax.errorbar(t, np.mean(accs, 0), yerr=st.sem(accs), fmt='', color='k', alpha=0.3)
ax.plot(t, np.mean(accs, 0), '--')
ax.plot(t, acc_function(t, *fit_params), 'r', label=label)
ax.legend()

ax.set_ylabel(r'Autocorrelation')
ax.set_xlabel(r'Lag [ms]')
ax.set_xlim(min(t), max(t))
#ax.set_ylim(0., 1.)

if save:
assert isinstance(save, str), "Please provide filename"
ax.figure.savefig(save + 'acc_fit.pdf')

if display:
pl.show(False) 
Example 75
def plot(self, label):
# average accuracy
yMean = mean(self.yValues)
color = plt.cm.Greens(0.9) if isinstance(label, str) and ((' 16]' in label) or ('16,' in label)) else self.colors[self.nextColorIdx]
confidenceIntervalColor = plt.cm.Greens(0.5)
color = plt.cm.cool(0.45) if isinstance(label, str) and ((' 16]' in label) or ('16,' in label)) else self.colors[self.nextColorIdx]
confidenceIntervalColor = plt.cm.cool(0.15)

if isinstance(label, str) and ((' 16]' in label) or ('16,' in label)):
self.ax.plot(self.xValues, [yMean], 'o', label=label, c=color,zorder=900)
else:
self.ax.plot(self.xValues, [yMean], 'o', label=label, c=color)

# update yMax, yMin
self.yMax = max(self.yMax, yMean)
self.yMin = min(self.yMin, yMean)

# # annotate label
# self.ax.annotate('{}'.format(label[label.find('-') + 1:]), (self.xValues[0], yMean), size=6)

confidenceHalfInterval = None
# add error bar if there is more than single value
if len(self.yValues) > 1:
# calc standard error of the mean
sem = st.sem(self.yValues)
# calc confidence interval
intervalMin, intervalMax = st.t.interval(self.confidence, len(self.yValues) - 1, loc=yMean, scale=sem)
confidenceHalfInterval = (intervalMax - intervalMin) / 2
self.ax.errorbar(self.xValues, [yMean], yerr=confidenceHalfInterval,
ms=5, marker='X', capsize=4, markeredgewidth=1, elinewidth=2, c=color)

if isinstance(label, str) and ((' 16]' in label) or ('16,' in label)):
self.ax.errorbar(self.xValues, [yMean], yerr=confidenceHalfInterval,
ms=5, marker='X', capsize=4, markeredgewidth=1, elinewidth=2, c=color,zorder=900)

if isinstance(label, str) and ((' 16]' in label) or ('16,' in label)):
if self.previousPoint is not None:
xPrev, yPrev, confidenceHalfIntervalPrev = self.previousPoint
self.ax.plot([xPrev, self.xValues[-1]], [yPrev, yMean], '--', c=color,zorder=900)
if confidenceHalfInterval and confidenceHalfIntervalPrev:
self.ax.plot([xPrev, self.xValues[-1]], [yPrev - confidenceHalfIntervalPrev, yMean - confidenceHalfInterval], '--',
c=confidenceIntervalColor,zorder=900)
self.ax.plot([xPrev, self.xValues[-1]], [yPrev + confidenceHalfIntervalPrev, yMean + confidenceHalfInterval], '--',
c=confidenceIntervalColor,zorder=900)
# save last point as previous point
self.previousPoint = (self.xValues[-1], yMean, confidenceHalfInterval)

# update variables for next plot
self.nextColorIdx += 1
self.resetPlot() 
Example 76
def plot_estimates(ru_estimate_plot, files):
data = [[0 for _ in range(len(files) / 39)] for _ in range(39-2)]
naive_data = [[0 for _ in range(len(files) / 39)] for _ in range(39-2)]
# data = [[0 for _ in range(39)] for _ in range(len(files) / 39)]

simulated = set([])
for file_name in files:
sim = int(file_name.split('_')[-2])
simulated = sorted(list(simulated))
sim_to_ind = {sim: i for i, sim in enumerate(simulated)}
# print sim_to_ind

for file_name in files:
coverage = int(file_name.split('_')[-1].split('.')[0][:-1])
if coverage < 3:
continue
sim = int(file_name.split('_')[-2])
estimate = 0
naive_estimate = 0
with open(file_name) as input:
if len(lines) > 1:
if lines[-1].strip() != 'None' and len(lines[-1]) < 10:
estimate = int(float(lines[-1].strip()))
with open(file_name + '.naive') as input:
if len(lines) > 1:
if lines[-1].strip() != 'None' and len(lines[-1]) < 10:
naive_estimate = int(float(lines[-1].strip()))
# print coverage, sim, sim_to_ind[sim]
data[coverage - 3][sim_to_ind[sim]] = estimate
naive_data[coverage - 3][sim_to_ind[sim]] = naive_estimate
# data[sim_to_ind[sim]][coverage - 1] = estimate
# if i == 0:
#     ru_estimate_plots[i] = sns.tsplot(data, err_style="ci_bars")
# else:
from scipy import stats
averages = []
for sim_rcout in range(len(data[0])):
total = 0
for cov in range(len(data)):
if data[cov][sim_rcout] == 0:
print(cov+9, sim_rcout)
total += data[cov][sim_rcout]
averages.append(total / float(len(data)))

naive_averages = []
for sim_rcout in range(len(naive_data[0])):
total = 0
for cov in range(len(naive_data)):
total += naive_data[cov][sim_rcout]
naive_averages.append(total / float(len(naive_data)))

ru_estimate_plot.errorbar(simulated, averages, yerr=stats.sem(data))
ru_estimate_plot.set_xticks(simulated, [sim+1 for sim in simulated])
ru_estimate_plot.set_yticks(simulated, [sim+1 for sim in simulated])
# ru_estimate_plot.errorbar(simulated, naive_averages, yerr=stats.sem(naive_data)) 
Example 77
def create_scipy_features(base_features, sentinel):
r"""Calculate the skew, kurtosis, and other statistical features
for each row.

Parameters
----------
base_features : numpy array
The feature dataframe.
sentinel : float
The number to be imputed for NaN values.

Returns
-------
sp_features : numpy array
The calculated SciPy features.

"""

logger.info("Creating SciPy Features")

# Generate scipy features

logger.info("SciPy Feature: geometric mean")
row_gmean = sps.gmean(base_features, axis=1)
logger.info("SciPy Feature: kurtosis")
row_kurtosis = sps.kurtosis(base_features, axis=1)
logger.info("SciPy Feature: kurtosis test")
row_ktest, pvalue = sps.kurtosistest(base_features, axis=1)
logger.info("SciPy Feature: normal test")
row_normal, pvalue = sps.normaltest(base_features, axis=1)
logger.info("SciPy Feature: skew")
row_skew = sps.skew(base_features, axis=1)
logger.info("SciPy Feature: skew test")
row_stest, pvalue = sps.skewtest(base_features, axis=1)
logger.info("SciPy Feature: variation")
row_var = sps.variation(base_features, axis=1)
logger.info("SciPy Feature: signal-to-noise ratio")
row_stn = sps.signaltonoise(base_features, axis=1)
logger.info("SciPy Feature: standard error of mean")
row_sem = sps.sem(base_features, axis=1)

sp_features = np.column_stack((row_gmean, row_kurtosis, row_ktest,
row_normal, row_skew, row_stest,
row_var, row_stn, row_sem))
sp_features = impute_values(sp_features, 'float64', sentinel)
sp_features = StandardScaler().fit_transform(sp_features)

# Return new SciPy features

logger.info("SciPy Feature Count : %d", sp_features.shape[1])
return sp_features

#
# Function create_clusters
#