# Python scipy.stats.ranksums() Examples

The following are code examples for showing how to use scipy.stats.ranksums(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
 Project: GeoPy   Author: aerler   File: stats.py    GNU General Public License v3.0 6 votes
```def ranksums_wrapper(data, size1=None, ignoreNaN=True):
''' Apply the Wilcoxon Ranksum Test, to test whether two samples are drawn from the same
underlying (continuous) distribution. This is a wrapper for the SciPy function that
removes NaN's, allows application over a field, and only returns the p-value. '''
if ignoreNaN:
data1 = data[:size1]; data2 = data[size1:]
nonans1 = np.invert(np.isnan(data1)) # test for NaN's
nonans2 = np.invert(np.isnan(data2))
if np.sum(nonans1) < 3 or np.sum(nonans2) < 3: return np.NaN # return, if less than 3 non-NaN's
data1 = data1[nonans1]; data2 = data2[nonans2] # remove NaN's
else:
data1 = data[:size1]; data2 = data[size1:]
# apply test
D, pval = ss.ranksums(data1, data2); del D
return pval

## bivariate statistical functions

# Pearson's Correlation Coefficient between two samples ```
Example 2
 Project: pscore_match   Author: kellieotto   File: match.py    BSD 2-Clause "Simplified" License 6 votes
```def rank_test(covariates, groups):
"""
Wilcoxon rank sum test for the distribution of treatment and control covariates.

Parameters
----------
covariates : DataFrame
Dataframe with one covariate per column.
If matches are with replacement, then duplicates should be
groups : array-like
treatment assignments, must be 2 groups

Returns
-------
A list of p-values, one for each column in covariates
"""
colnames = list(covariates.columns)
J = len(colnames)
pvalues = np.zeros(J)
for j in range(J):
var = covariates[colnames[j]]
res = ranksums(var[groups == 1], var[groups == 0])
pvalues[j] = res.pvalue
return pvalues ```
Example 3
 Project: GeoPy   Author: aerler   File: stats.py    GNU General Public License v3.0 5 votes
```def ranksums(sample1, sample2, lstatistic=False, ignoreNaN=True, **kwargs):
''' Apply the Wilcoxon Ranksum Test, to test whether two samples are drawn from the same
underlying (continuous) distribution; a high p-value means, the two samples are likely
drawn from the same distribution.
The Ranksum Test has higher efficiency for non-normal distributions and is almost as
reliable as the T-test for normal distributions. It is less sophisticated than the
Mann-Whitney Test and does not handle ties between ranks. '''
if lstatistic: raise NotImplementedError("Return of test statistic is not yet implemented; only p-values are returned.")
testfct = functools.partial(ranksums_wrapper, ignoreNaN=ignoreNaN)
pvar = apply_stat_test_2samp(sample1, sample2, fct=testfct, laax=True,
lpval=True, lrho=False, **kwargs)
return pvar ```
Example 4
```def test_ranksums_result_attributes(self):
res = stats.ranksums(np.arange(5), np.arange(25))
attributes = ('statistic', 'pvalue')
check_named_results(res, attributes) ```
Example 5
```def test_ranksums_result_attributes(self):
res = stats.ranksums(np.arange(5), np.arange(25))
attributes = ('statistic', 'pvalue')
check_named_results(res, attributes) ```
Example 6
```def test_ranksums_result_attributes(self):
res = stats.ranksums(np.arange(5), np.arange(25))
attributes = ('statistic', 'pvalue')
check_named_results(res, attributes) ```
Example 7
```def test_ranksums_result_attributes(self):
res = stats.ranksums(np.arange(5), np.arange(25))
attributes = ('statistic', 'pvalue')
check_named_results(res, attributes) ```
Example 8
```def main():
"""
1st phase
top1 = [70.0, 71.1, 72.5, 70.8, 68.1, 71.9, 71.1, 71.3, 68.4, 70.2]
top3 = [75.8, 78.4, 77.8, 77.7, 80.0, 77.8, 78.7, 76.4, 79.1, 77.3]
2nd phase
"""
x = [53.6, 54.5, 53.7, 52.7, 53.1, 55.5, 55.5, 52.8, 53.7, 52.7]
y = [89.7, 89.1, 89.5, 88.7, 89.4, 88.6, 89.8, 89.5, 89.2, 89.7]
# Compute the Wilcoxon rank-sum statistic for two samples.
wilcoxon = stats.ranksums(x, y)
anova = stats.f_oneway(x, y)
print "Wilcoxon: " + str(wilcoxon[1]) + "; ANOVA: " + str(anova[1]) ```
Example 9
```def test_ranksums_result_attributes(self):
res = stats.ranksums(np.arange(5), np.arange(25))
attributes = ('statistic', 'pvalue')
check_named_results(res, attributes) ```
Example 10
 Project: ribotish   Author: zhpn1024   File: ribo.py    GNU General Public License v3.0 5 votes
```def rstest_mw(x, y):
'''rank sum test p value of x > y, Not used because of some bug in mannwhitneyu
'''
from scipy.stats import ranksums, mannwhitneyu
#n1, n2 = len(x), len(y)
#mu = n1 * n2 / 2.
st1, p1 = ranksums(x, y)
try : st, p = mannwhitneyu(x, y)
except : return 0.5
if st1 > 0 : return p
else: return 1 - p ### ```
Example 11
```def calc_p_matrix():
sheet_name_vec = ['Indoor', 'Outdoor']
sub_num = 7
is_normal_list = [True, True]
p_matrix = np.ones((2, 6, 6))
for i in range(2):
data = dfs.values[:, 2:sub_num+2].astype(np.float)
data_list = []
for m in range(6):
data_m = data[5*m:5*(m+1),:].reshape(-1)
if is_normal_list[i]:
statistic, critical_values, significance_level = stats.anderson(data_m)
if statistic > critical_values[2]:
is_normal_list[i] = False
data_list.append(data_m)
for r in  range(6):
for c in  range(6):
if is_normal_list[i]:
_, p_matrix[i, r, c] = stats.ttest_ind(data_list[r], data_list[c])
else:
_, p_matrix[i, r, c] = stats.ranksums(data_list[r], data_list[c])
return p_matrix, is_normal_list

# Check if data is normal distributed and calculate p values
# p_matrix, is_normal_list = calc_p_matrix()

# # Fig: 5_indoor_classify, Fig: 6_outdoor_classify
# plot_classification_acc()

# Fig: 7_time_delay_analysis ```
Example 12
```def test_ranksums_result_attributes(self):
res = stats.ranksums(np.arange(5), np.arange(25))
attributes = ('statistic', 'pvalue')
check_named_results(res, attributes) ```
Example 13
 Project: sktime   Author: alan-turing-institute   File: evaluation.py    BSD 3-Clause "New" or "Revised" License 5 votes
```def ranksum_test(self, metric_name=None):
"""
Non-parametric test for testing consistent differences between pairs of obeservations.
The test counts the number of observations that are greater, smaller and equal to the mean
`<http://en.wikipedia.org/wiki/Wilcoxon_rank-sum_test>`_.
"""
self._check_is_evaluated()
metric_name = self._validate_metric_name(metric_name)
metrics_per_estimator_dataset = self._get_metrics_per_estimator_dataset(metric_name)

ranksum_df = pd.DataFrame()
perms = itertools.product(metrics_per_estimator_dataset.keys(), repeat=2)
values = np.array([])
for perm in perms:
x = metrics_per_estimator_dataset[perm[0]]
y = metrics_per_estimator_dataset[perm[1]]
t_stat, p_val = ranksums(x, y)
ranksum = {
"estimator_1": perm[0],
"estimator_2": perm[1],
"t_stat": t_stat,
"p_val": p_val
}
ranksum_df = ranksum_df.append(ranksum, ignore_index=True)
values = np.append(values, t_stat)
values = np.append(values, p_val)

index = ranksum_df["estimator_1"].unique()
values_names = ["t_stat", "p_val"]
col_idx = pd.MultiIndex.from_product([index, values_names])
values_reshaped = values.reshape(len(index), len(values_names) * len(index))

values_df_multiindex = pd.DataFrame(values_reshaped, index=index, columns=col_idx)

return ranksum_df, values_df_multiindex ```
Example 14
 Project: COMETSC   Author: MSingerLab   File: hgmd.py    BSD 3-Clause "New" or "Revised" License 4 votes
```def batch_stats(marker_exp, c_list, coi):
"""Applies t test & wilcoxon rank sum test to a gene expression matrix, gene by gene.

:param marker_exp: A DataFrame whose rows are cell identifiers, columns are
gene identifiers, and values are float values representing gene
expression.
:param c_list: A Series whose indices are cell identifiers, and whose
values are the cluster which that cell is part of.
:param coi: The cluster of interest.

:returns: A matrix with arbitary row indices whose columns are the gene, t
statistic, then t p-value; the last two being of float type.
Their names are 'gene', 't_stat' , 't_pval' , 'w_stat' , 'w_pval'

:rtype: pandas.DataFrame
"""

t = marker_exp.apply(
lambda col:
ss.ttest_ind(
col[c_list == coi],
col[c_list != coi],
equal_var=False
)
)
ws = marker_exp.apply(
lambda col:
ss.ranksums(
col[c_list == coi],
col[c_list != coi]
)
)
output = pd.DataFrame()
output['gene_1'] = t.index
#output['gene_1'] = ws.index
output[['t_stat', 't_pval']] = pd.DataFrame(
t.values.tolist(),
columns=['t_stat', 't_pval']
)
output[['w_stat', 'w_pval']] = pd.DataFrame(
ws.values.tolist(),
columns=['w_stat', 'w_pval']
)

return output ```
Example 15
```def stabilityMeasure(X, d_max, B=5, power_iters=2):
""" Calculate stability of
Parameters
----------
X : int matrix
input matrix to determine rank of
d_max : int
upper bound rank to estimate
B : int
default: 5
number of projections to correlate
power_iters : int
default: 2
Number of power iterations used (random matrix multiplications)
Returns
-------
int
Latent (lower-dimensional) matrix rank
"""
singular_basis = np.zeros((B,X.shape[0],d_max))
# calculate singular basis under multiple projections
for i in range(B):
U = rsvd(X,d_max)[0]
singular_basis[i,:,:] = U[:,0:d_max]

# calculate score for each singular vector
stability_vec = np.zeros((d_max))
for k in range(d_max):
stability = 0
for i in range(0,B-1):
for j in range(i+1,B):
corr = stats.spearmanr(singular_basis[i,:,k],singular_basis[j,:,k])[0]
stability = stability + abs(corr)
N = B*(B-1)/2
stability = stability/N
stability_vec[k] = stability

# wilcoxon rank-sum test p-values
p_vals = np.zeros(d_max-2)
for k in range(2,d_max):
p_vals[k-2] = stats.ranksums(stability_vec[0:k-1],stability_vec[k-1:d_max])[1]

dstar = np.argmin(p_vals)

return dstar ```