Python scipy.stats.ranksums() Examples

The following are code examples for showing how to use scipy.stats.ranksums(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: GeoPy   Author: aerler   File: stats.py    GNU General Public License v3.0 6 votes vote down vote up
def ranksums_wrapper(data, size1=None, ignoreNaN=True):
  ''' Apply the Wilcoxon Ranksum Test, to test whether two samples are drawn from the same
      underlying (continuous) distribution. This is a wrapper for the SciPy function that 
      removes NaN's, allows application over a field, and only returns the p-value. '''
  if ignoreNaN:
    data1 = data[:size1]; data2 = data[size1:]
    nonans1 = np.invert(np.isnan(data1)) # test for NaN's
    nonans2 = np.invert(np.isnan(data2))
    if np.sum(nonans1) < 3 or np.sum(nonans2) < 3: return np.NaN # return, if less than 3 non-NaN's
    data1 = data1[nonans1]; data2 = data2[nonans2] # remove NaN's
  else:
    data1 = data[:size1]; data2 = data[size1:]
  # apply test
  D, pval = ss.ranksums(data1, data2); del D
  return pval  


## bivariate statistical functions

# Pearson's Correlation Coefficient between two samples 
Example 2
Project: pscore_match   Author: kellieotto   File: match.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def rank_test(covariates, groups):
    """ 
    Wilcoxon rank sum test for the distribution of treatment and control covariates.
    
    Parameters
    ----------
    covariates : DataFrame 
        Dataframe with one covariate per column.
        If matches are with replacement, then duplicates should be 
        included as additional rows.
    groups : array-like
        treatment assignments, must be 2 groups
    
    Returns
    -------
    A list of p-values, one for each column in covariates
    """    
    colnames = list(covariates.columns)
    J = len(colnames)
    pvalues = np.zeros(J)
    for j in range(J):
        var = covariates[colnames[j]]
        res = ranksums(var[groups == 1], var[groups == 0])
        pvalues[j] = res.pvalue
    return pvalues 
Example 3
Project: GeoPy   Author: aerler   File: stats.py    GNU General Public License v3.0 5 votes vote down vote up
def ranksums(sample1, sample2, lstatistic=False, ignoreNaN=True, **kwargs):
  ''' Apply the Wilcoxon Ranksum Test, to test whether two samples are drawn from the same
      underlying (continuous) distribution; a high p-value means, the two samples are likely
      drawn from the same distribution. 
      The Ranksum Test has higher efficiency for non-normal distributions and is almost as
      reliable as the T-test for normal distributions. It is less sophisticated than the 
      Mann-Whitney Test and does not handle ties between ranks. '''
  if lstatistic: raise NotImplementedError("Return of test statistic is not yet implemented; only p-values are returned.")
  testfct = functools.partial(ranksums_wrapper, ignoreNaN=ignoreNaN)
  pvar = apply_stat_test_2samp(sample1, sample2, fct=testfct, laax=True, 
                               lpval=True, lrho=False, **kwargs)
  return pvar 
Example 4
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 5 votes vote down vote up
def test_ranksums_result_attributes(self):
        res = stats.ranksums(np.arange(5), np.arange(25))
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes) 
Example 5
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes vote down vote up
def test_ranksums_result_attributes(self):
        res = stats.ranksums(np.arange(5), np.arange(25))
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes) 
Example 6
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 5 votes vote down vote up
def test_ranksums_result_attributes(self):
        res = stats.ranksums(np.arange(5), np.arange(25))
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes) 
Example 7
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_ranksums_result_attributes(self):
        res = stats.ranksums(np.arange(5), np.arange(25))
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes) 
Example 8
Project: facial-emotion-detection-dl   Author: dllatas   File: calculate_p_value.py    MIT License 5 votes vote down vote up
def main():
	"""
	1st phase
	top1 = [70.0, 71.1, 72.5, 70.8, 68.1, 71.9, 71.1, 71.3, 68.4, 70.2]
	top3 = [75.8, 78.4, 77.8, 77.7, 80.0, 77.8, 78.7, 76.4, 79.1, 77.3]
	2nd phase
	"""
	x = [53.6, 54.5, 53.7, 52.7, 53.1, 55.5, 55.5, 52.8, 53.7, 52.7]
	y = [89.7, 89.1, 89.5, 88.7, 89.4, 88.6, 89.8, 89.5, 89.2, 89.7]
	# Compute the Wilcoxon rank-sum statistic for two samples.
	wilcoxon = stats.ranksums(x, y)
	anova = stats.f_oneway(x, y)
	print "Wilcoxon: " + str(wilcoxon[1]) + "; ANOVA: " + str(anova[1]) 
Example 9
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes vote down vote up
def test_ranksums_result_attributes(self):
        res = stats.ranksums(np.arange(5), np.arange(25))
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes) 
Example 10
Project: ribotish   Author: zhpn1024   File: ribo.py    GNU General Public License v3.0 5 votes vote down vote up
def rstest_mw(x, y): 
  '''rank sum test p value of x > y, Not used because of some bug in mannwhitneyu
  '''
  from scipy.stats import ranksums, mannwhitneyu
  #n1, n2 = len(x), len(y)
  #mu = n1 * n2 / 2.
  st1, p1 = ranksums(x, y)
  try : st, p = mannwhitneyu(x, y)
  except : return 0.5
  if st1 > 0 : return p
  else: return 1 - p ### 
Example 11
Project: HMM-decision-fusion   Author: KuangenZhang   File: result_analysis.py    MIT License 5 votes vote down vote up
def calc_p_matrix():
    sheet_name_vec = ['Indoor', 'Outdoor']
    sub_num = 7
    is_normal_list = [True, True]
    p_matrix = np.ones((2, 6, 6))
    for i in range(2):
        dfs = pd.read_excel("classification accuracy.xlsx", sheet_name=sheet_name_vec[i])
        data = dfs.values[:, 2:sub_num+2].astype(np.float)
        data_list = []
        for m in range(6):
            data_m = data[5*m:5*(m+1),:].reshape(-1)
            if is_normal_list[i]:
                statistic, critical_values, significance_level = stats.anderson(data_m)
                if statistic > critical_values[2]:
                    is_normal_list[i] = False
            data_list.append(data_m)
        for r in  range(6):
            for c in  range(6):
                if is_normal_list[i]:
                    _, p_matrix[i, r, c] = stats.ttest_ind(data_list[r], data_list[c])
                else:
                    _, p_matrix[i, r, c] = stats.ranksums(data_list[r], data_list[c])
    return p_matrix, is_normal_list

# Check if data is normal distributed and calculate p values
# p_matrix, is_normal_list = calc_p_matrix()

# # Fig: 5_indoor_classify, Fig: 6_outdoor_classify
# plot_classification_acc()

# Fig: 7_time_delay_analysis 
Example 12
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 5 votes vote down vote up
def test_ranksums_result_attributes(self):
        res = stats.ranksums(np.arange(5), np.arange(25))
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes) 
Example 13
Project: sktime   Author: alan-turing-institute   File: evaluation.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def ranksum_test(self, metric_name=None):
        """
        Non-parametric test for testing consistent differences between pairs of obeservations.
        The test counts the number of observations that are greater, smaller and equal to the mean
        `<http://en.wikipedia.org/wiki/Wilcoxon_rank-sum_test>`_.
        """
        self._check_is_evaluated()
        metric_name = self._validate_metric_name(metric_name)
        metrics_per_estimator_dataset = self._get_metrics_per_estimator_dataset(metric_name)

        ranksum_df = pd.DataFrame()
        perms = itertools.product(metrics_per_estimator_dataset.keys(), repeat=2)
        values = np.array([])
        for perm in perms:
            x = metrics_per_estimator_dataset[perm[0]]
            y = metrics_per_estimator_dataset[perm[1]]
            t_stat, p_val = ranksums(x, y)
            ranksum = {
                "estimator_1": perm[0],
                "estimator_2": perm[1],
                "t_stat": t_stat,
                "p_val": p_val
            }
            ranksum_df = ranksum_df.append(ranksum, ignore_index=True)
            values = np.append(values, t_stat)
            values = np.append(values, p_val)

        index = ranksum_df["estimator_1"].unique()
        values_names = ["t_stat", "p_val"]
        col_idx = pd.MultiIndex.from_product([index, values_names])
        values_reshaped = values.reshape(len(index), len(values_names) * len(index))

        values_df_multiindex = pd.DataFrame(values_reshaped, index=index, columns=col_idx)

        return ranksum_df, values_df_multiindex 
Example 14
Project: COMETSC   Author: MSingerLab   File: hgmd.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def batch_stats(marker_exp, c_list, coi):
    """Applies t test & wilcoxon rank sum test to a gene expression matrix, gene by gene.

    :param marker_exp: A DataFrame whose rows are cell identifiers, columns are
        gene identifiers, and values are float values representing gene
        expression.
    :param c_list: A Series whose indices are cell identifiers, and whose
        values are the cluster which that cell is part of.
    :param coi: The cluster of interest.

    :returns: A matrix with arbitary row indices whose columns are the gene, t
              statistic, then t p-value; the last two being of float type.
              Their names are 'gene', 't_stat' , 't_pval' , 'w_stat' , 'w_pval'

    :rtype: pandas.DataFrame
    """
        
    t = marker_exp.apply(
        lambda col:
        ss.ttest_ind(
            col[c_list == coi],
            col[c_list != coi],
            equal_var=False
        )
    )
    ws = marker_exp.apply(
        lambda col:
        ss.ranksums(
            col[c_list == coi],
            col[c_list != coi]
        )
    )
    output = pd.DataFrame()
    output['gene_1'] = t.index
    #output['gene_1'] = ws.index
    output[['t_stat', 't_pval']] = pd.DataFrame(
        t.values.tolist(),
        columns=['t_stat', 't_pval']
    )
    output[['w_stat', 'w_pval']] = pd.DataFrame(
        ws.values.tolist(),
        columns=['w_stat', 'w_pval']
    )

    return output 
Example 15
Project: arsvd   Author: gdarnell   File: dimension_reduction.py    MIT License 4 votes vote down vote up
def stabilityMeasure(X, d_max, B=5, power_iters=2):
	""" Calculate stability of 
	Parameters
	----------
	X : int matrix
		input matrix to determine rank of
	d_max : int
		upper bound rank to estimate
	B : int
		default: 5
		number of projections to correlate
	power_iters : int
		default: 2
   		Number of power iterations used (random matrix multiplications)
	Returns
	-------
	int
		Latent (lower-dimensional) matrix rank
	"""
	singular_basis = np.zeros((B,X.shape[0],d_max))
	# calculate singular basis under multiple projections
	for i in range(B):
		U = rsvd(X,d_max)[0]
		singular_basis[i,:,:] = U[:,0:d_max]

	# calculate score for each singular vector
	stability_vec = np.zeros((d_max))
	for k in range(d_max):
		stability = 0
		for i in range(0,B-1):
			for j in range(i+1,B):
				corr = stats.spearmanr(singular_basis[i,:,k],singular_basis[j,:,k])[0]
				stability = stability + abs(corr)
		N = B*(B-1)/2
		stability = stability/N
		stability_vec[k] = stability

	# wilcoxon rank-sum test p-values
	p_vals = np.zeros(d_max-2)
	for k in range(2,d_max):
		p_vals[k-2] = stats.ranksums(stability_vec[0:k-1],stability_vec[k-1:d_max])[1]

	dstar = np.argmin(p_vals)
	
	return dstar