Python scipy.stats.ranksums() Examples

The following are 5 code examples of scipy.stats.ranksums(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: match.py    From pscore_match with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def rank_test(covariates, groups):
    """ 
    Wilcoxon rank sum test for the distribution of treatment and control covariates.
    
    Parameters
    ----------
    covariates : DataFrame 
        Dataframe with one covariate per column.
        If matches are with replacement, then duplicates should be 
        included as additional rows.
    groups : array-like
        treatment assignments, must be 2 groups
    
    Returns
    -------
    A list of p-values, one for each column in covariates
    """    
    colnames = list(covariates.columns)
    J = len(colnames)
    pvalues = np.zeros(J)
    for j in range(J):
        var = covariates[colnames[j]]
        res = ranksums(var[groups == 1], var[groups == 0])
        pvalues[j] = res.pvalue
    return pvalues 
Example #2
Source File: compare_genomes.py    From mCaller with MIT License 6 votes vote down vote up
def compare_by_position(bed1,bed2,xmfa):
    pos_dict = {}

    for i,bed in enumerate([bed1,bed2]):
        pos_dict[i] = {}
        with open(bed,'r') as fi:
                for line in fi:
                #2  1892198 1892199 TCMMTMTTMMM 0.5 -   16
                    csome,start,end,motif,perc_meth,strand,num_reads,probabilities = tuple(line.split('\t'))
                    pos_dict[i][(csome,start,end,strand)] = ((perc_meth,num_reads),np.asarray([float(p) for p in probabilities.strip().split(',')]))

    for pos in pos_dict[0]:
        if pos in pos_dict[1]:
            try:
                u,pval = mannwhitneyu(pos_dict[0][pos][1],pos_dict[0][pos][1],alternative='two-sided')
            except ValueError:
                u,pval = 'none','identical'
            u2,pval2 = ranksums(pos_dict[0][pos][1],pos_dict[0][pos][1])
            try:
                t,pval3 = ttest_ind(pos_dict[0][pos][1],pos_dict[0][pos][1])
            except:
                t,pval3 = 'none','missing df'
            d,pval4 = ks_2samp(pos_dict[0][pos][1],pos_dict[0][pos][1])
            if pval4 < 0.9:
                print pos, pos_dict[0][pos][0], pos_dict[1][pos][0], pval, pval2, pval3, pval4 
Example #3
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_ranksums_result_attributes(self):
        res = stats.ranksums(np.arange(5), np.arange(25))
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes) 
Example #4
Source File: scdiff.py    From scdiff with MIT License 4 votes vote down vote up
def getParent(self,A,TP):
		#A is the cluster,we are trying to find the parent for cluster A.
		#TP is the parent level
		#PL is the cluste list in parent level
	
		PL=[item for item in self.Nodes if (item.T==TP and item.ST<=A.ST)]
		PL=sorted(PL,key=lambda item:sum([getDistance(item,ck) for ck in A.cells])/len(A.cells))
		#pdb.set_trace()
		#---------------------------------------------------------------
		# if time sync is disabled, output the closest node in parent level
		
		if self.dsync=='True' or self.dsync=='1':
			if len(PL)>0:
				return PL[0]
			return None
			
		#--------------------------------------------------------------
		
		pvcut=0.1
		if len(PL)>1:
			X=[getDistance(PL[0],item) for item in A.cells]
			Y=[getDistance(PL[-1],item)  for item in A.cells]
			# Length adjustment if Vector is too short
			SizeFactor=50 # Used for length adjustment
			X=X*int(SizeFactor/len(X)) if len(X)<SizeFactor else X
			Y=Y*int(SizeFactor/len(Y)) if len(Y)<SizeFactor else Y
			pv=ranksums(X,Y)[-1]
			if pv<pvcut:
				return PL[0]
			else:
			   return self.getParent(A,TP-1)
		elif len(PL)==1:
			if PL[0]==self.Nodes[0]:
				return PL[0]
			else:
				PLL=[item for item in self.Nodes if (item.T==TP-1 and item.ST<=A.ST)]
				if len(PLL)==1:
					return PL[0]
				else:
					return self.getParent(A,TP-1)
		else:
			if A!=self.Nodes[0]:
				return self.getParent(A,TP-1) 
Example #5
Source File: evaluation.py    From sktime with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def ranksum_test(self, metric_name=None):
        """
        Non-parametric test for testing consistent differences between pairs
        of obeservations.
        The test counts the number of observations that are greater, smaller
        and equal to the mean
        `<http://en.wikipedia.org/wiki/Wilcoxon_rank-sum_test>`_.
        """
        self._check_is_evaluated()
        metric_name = self._validate_metric_name(metric_name)
        metrics_per_estimator_dataset = \
            self._get_metrics_per_estimator_dataset(
                metric_name)

        ranksum_df = pd.DataFrame()
        perms = itertools.product(metrics_per_estimator_dataset.keys(),
                                  repeat=2)
        values = np.array([])
        for perm in perms:
            x = metrics_per_estimator_dataset[perm[0]]
            y = metrics_per_estimator_dataset[perm[1]]
            t_stat, p_val = ranksums(x, y)
            ranksum = {
                "estimator_1": perm[0],
                "estimator_2": perm[1],
                "t_stat": t_stat,
                "p_val": p_val
            }
            ranksum_df = ranksum_df.append(ranksum, ignore_index=True)
            values = np.append(values, t_stat)
            values = np.append(values, p_val)

        index = ranksum_df["estimator_1"].unique()
        values_names = ["t_stat", "p_val"]
        col_idx = pd.MultiIndex.from_product([index, values_names])
        values_reshaped = values.reshape(len(index),
                                         len(values_names) * len(index))

        values_df_multiindex = pd.DataFrame(values_reshaped, index=index,
                                            columns=col_idx)

        return ranksum_df, values_df_multiindex