Python scipy.stats.ranksums() Examples
The following are 5
code examples of scipy.stats.ranksums().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.stats
, or try the search function
.
Example #1
Source File: match.py From pscore_match with BSD 2-Clause "Simplified" License | 6 votes |
def rank_test(covariates, groups): """ Wilcoxon rank sum test for the distribution of treatment and control covariates. Parameters ---------- covariates : DataFrame Dataframe with one covariate per column. If matches are with replacement, then duplicates should be included as additional rows. groups : array-like treatment assignments, must be 2 groups Returns ------- A list of p-values, one for each column in covariates """ colnames = list(covariates.columns) J = len(colnames) pvalues = np.zeros(J) for j in range(J): var = covariates[colnames[j]] res = ranksums(var[groups == 1], var[groups == 0]) pvalues[j] = res.pvalue return pvalues
Example #2
Source File: compare_genomes.py From mCaller with MIT License | 6 votes |
def compare_by_position(bed1,bed2,xmfa): pos_dict = {} for i,bed in enumerate([bed1,bed2]): pos_dict[i] = {} with open(bed,'r') as fi: for line in fi: #2 1892198 1892199 TCMMTMTTMMM 0.5 - 16 csome,start,end,motif,perc_meth,strand,num_reads,probabilities = tuple(line.split('\t')) pos_dict[i][(csome,start,end,strand)] = ((perc_meth,num_reads),np.asarray([float(p) for p in probabilities.strip().split(',')])) for pos in pos_dict[0]: if pos in pos_dict[1]: try: u,pval = mannwhitneyu(pos_dict[0][pos][1],pos_dict[0][pos][1],alternative='two-sided') except ValueError: u,pval = 'none','identical' u2,pval2 = ranksums(pos_dict[0][pos][1],pos_dict[0][pos][1]) try: t,pval3 = ttest_ind(pos_dict[0][pos][1],pos_dict[0][pos][1]) except: t,pval3 = 'none','missing df' d,pval4 = ks_2samp(pos_dict[0][pos][1],pos_dict[0][pos][1]) if pval4 < 0.9: print pos, pos_dict[0][pos][0], pos_dict[1][pos][0], pval, pval2, pval3, pval4
Example #3
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_ranksums_result_attributes(self): res = stats.ranksums(np.arange(5), np.arange(25)) attributes = ('statistic', 'pvalue') check_named_results(res, attributes)
Example #4
Source File: scdiff.py From scdiff with MIT License | 4 votes |
def getParent(self,A,TP): #A is the cluster,we are trying to find the parent for cluster A. #TP is the parent level #PL is the cluste list in parent level PL=[item for item in self.Nodes if (item.T==TP and item.ST<=A.ST)] PL=sorted(PL,key=lambda item:sum([getDistance(item,ck) for ck in A.cells])/len(A.cells)) #pdb.set_trace() #--------------------------------------------------------------- # if time sync is disabled, output the closest node in parent level if self.dsync=='True' or self.dsync=='1': if len(PL)>0: return PL[0] return None #-------------------------------------------------------------- pvcut=0.1 if len(PL)>1: X=[getDistance(PL[0],item) for item in A.cells] Y=[getDistance(PL[-1],item) for item in A.cells] # Length adjustment if Vector is too short SizeFactor=50 # Used for length adjustment X=X*int(SizeFactor/len(X)) if len(X)<SizeFactor else X Y=Y*int(SizeFactor/len(Y)) if len(Y)<SizeFactor else Y pv=ranksums(X,Y)[-1] if pv<pvcut: return PL[0] else: return self.getParent(A,TP-1) elif len(PL)==1: if PL[0]==self.Nodes[0]: return PL[0] else: PLL=[item for item in self.Nodes if (item.T==TP-1 and item.ST<=A.ST)] if len(PLL)==1: return PL[0] else: return self.getParent(A,TP-1) else: if A!=self.Nodes[0]: return self.getParent(A,TP-1)
Example #5
Source File: evaluation.py From sktime with BSD 3-Clause "New" or "Revised" License | 4 votes |
def ranksum_test(self, metric_name=None): """ Non-parametric test for testing consistent differences between pairs of obeservations. The test counts the number of observations that are greater, smaller and equal to the mean `<http://en.wikipedia.org/wiki/Wilcoxon_rank-sum_test>`_. """ self._check_is_evaluated() metric_name = self._validate_metric_name(metric_name) metrics_per_estimator_dataset = \ self._get_metrics_per_estimator_dataset( metric_name) ranksum_df = pd.DataFrame() perms = itertools.product(metrics_per_estimator_dataset.keys(), repeat=2) values = np.array([]) for perm in perms: x = metrics_per_estimator_dataset[perm[0]] y = metrics_per_estimator_dataset[perm[1]] t_stat, p_val = ranksums(x, y) ranksum = { "estimator_1": perm[0], "estimator_2": perm[1], "t_stat": t_stat, "p_val": p_val } ranksum_df = ranksum_df.append(ranksum, ignore_index=True) values = np.append(values, t_stat) values = np.append(values, p_val) index = ranksum_df["estimator_1"].unique() values_names = ["t_stat", "p_val"] col_idx = pd.MultiIndex.from_product([index, values_names]) values_reshaped = values.reshape(len(index), len(values_names) * len(index)) values_df_multiindex = pd.DataFrame(values_reshaped, index=index, columns=col_idx) return ranksum_df, values_df_multiindex