Python scipy.stats.rankdata() Examples

The following are 30 code examples of scipy.stats.rankdata(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: test_mstats_basic.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_ranking(self):
        x = ma.array([0,1,1,1,2,3,4,5,5,6,])
        assert_almost_equal(mstats.rankdata(x),
                           [1,3,3,3,5,6,7,8.5,8.5,10])
        x[[3,4]] = masked
        assert_almost_equal(mstats.rankdata(x),
                           [1,2.5,2.5,0,0,4,5,6.5,6.5,8])
        assert_almost_equal(mstats.rankdata(x, use_missing=True),
                            [1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
        x = ma.array([0,1,5,1,2,4,3,5,1,6,])
        assert_almost_equal(mstats.rankdata(x),
                           [1,3,8.5,3,5,7,6,8.5,3,10])
        x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
        assert_almost_equal(mstats.rankdata(x),
                            [[1,3,3,3,5], [6,7,8.5,8.5,10]])
        assert_almost_equal(mstats.rankdata(x, axis=1),
                           [[1,3,3,3,5], [1,2,3.5,3.5,5]])
        assert_almost_equal(mstats.rankdata(x,axis=0),
                           [[1,1,1,1,1], [2,2,2,2,2,]]) 
Example #2
Source File: lowe_interactive_predict.py    From ochem_predict_nn with MIT License 6 votes vote down vote up
def score_candidates(reactants, candidate_list, xs):

	pred = model.predict(xs, batch_size = 20)[0]
	rank = ss.rankdata(pred)

	fname = raw_input('Enter file name to save to: ') + '.dat'
	with open(os.path.join(FROOT, fname), 'w') as fid:
		fid.write('FOR REACTANTS {}\n'.format(Chem.MolToSmiles(reactants)))
		fid.write('Candidate product\tCandidate edit\tProbability\tRank\n')
		for (c, candidate) in enumerate(candidate_list):
			candidate_smile = candidate[0]
			candidate_edit = candidate[1]
			fid.write('{}\t{}\t{}\t{}\n'.format(
				candidate_smile, candidate_edit, pred[c], 1 + len(pred) - rank[c]
			))
	print('Wrote to file {}'.format(os.path.join(FROOT, fname))) 
Example #3
Source File: kernels.py    From BrainSpace with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _build_kernel(x, kernel, gamma=None):

    if kernel in {'pearson', 'spearman'}:
        if kernel == 'spearman':
            x = np.apply_along_axis(rankdata, 1, x)
        return np.corrcoef(x)

    if kernel in {'cosine', 'normalized_angle'}:
        x = 1 - squareform(pdist(x, metric='cosine'))
        if kernel == 'normalized_angle':
            x = 1 - np.arccos(x, x)/np.pi
        return x

    if kernel == 'gaussian':
        if gamma is None:
            gamma = 1 / x.shape[1]
        return rbf_kernel(x, gamma=gamma)

    if callable(kernel):
        return kernel(x)

    raise ValueError("Unknown kernel '{0}'.".format(kernel)) 
Example #4
Source File: ScaledFScore.py    From scattertext with Apache License 2.0 6 votes vote down vote up
def _get_scaler_function(scaler_algo):
        scaler = None
        if scaler_algo == 'normcdf':
            scaler = lambda x: norm.cdf(x, x.mean(), x.std())
        elif scaler_algo == 'lognormcdf':
            scaler = lambda x: norm.cdf(np.log(x), np.log(x).mean(), np.log(x).std())
        elif scaler_algo == 'percentile':
            scaler = lambda x: rankdata(x).astype(np.float64) / len(x)
        elif scaler_algo == 'percentiledense':
            scaler = lambda x: rankdata(x, method='dense').astype(np.float64) / len(x)
        elif scaler_algo == 'ecdf':
            from statsmodels.distributions import ECDF
            scaler = lambda x: ECDF(x)
        elif scaler_algo == 'none':
            scaler = lambda x: x
        else:
            raise InvalidScalerException("Invalid scaler alogrithm.  Must be either percentile or normcdf.")
        return scaler 
Example #5
Source File: multicomp.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def runbasic_old(self, useranks=False):
        #check: refactoring screwed up case useranks=True

        #groupxsum = np.bincount(intlab, weights=X[:,0])
        #groupxmean = groupxsum * 1.0 / groupnobs
        x = self.x
        if useranks:
            self.xx = x[:,1].argsort().argsort() + 1  #rankraw
        else:
            self.xx = x[:,0]
        self.groupsum = groupranksum = np.bincount(self.intlab, weights=self.xx)
        #print('groupranksum', groupranksum, groupranksum.shape, self.groupnobs.shape
        # start at 1 for stats.rankdata :
        self.groupmean = grouprankmean = groupranksum * 1.0 / self.groupnobs # + 1
        self.groupmeanfilter = grouprankmean[self.intlab]
        #return grouprankmean[intlab] 
Example #6
Source File: multicomp.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def runbasic(self, useranks=False):
        #check: refactoring screwed up case useranks=True

        #groupxsum = np.bincount(intlab, weights=X[:,0])
        #groupxmean = groupxsum * 1.0 / groupnobs
        x = self.x
        if useranks:
            xuni, xintlab = np.unique(x[:,0], return_inverse=True)
            ranksraw = x[:,0].argsort().argsort() + 1  #rankraw
            self.xx = GroupsStats(np.column_stack([ranksraw, xintlab]),
                                  useranks=False).groupmeanfilter
        else:
            self.xx = x[:,0]
        self.groupsum = groupranksum = np.bincount(self.intlab, weights=self.xx)
        #print('groupranksum', groupranksum, groupranksum.shape, self.groupnobs.shape
        # start at 1 for stats.rankdata :
        self.groupmean = grouprankmean = groupranksum * 1.0 / self.groupnobs # + 1
        self.groupmeanfilter = grouprankmean[self.intlab]
        #return grouprankmean[intlab] 
Example #7
Source File: multicomp.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def rankdata(x):
    '''rankdata, equivalent to scipy.stats.rankdata

    just a different implementation, I have not yet compared speed

    '''
    uni, intlab = np.unique(x[:,0], return_inverse=True)
    groupnobs = np.bincount(intlab)
    groupxsum = np.bincount(intlab, weights=X[:,0])
    groupxmean = groupxsum * 1.0 / groupnobs

    rankraw = x[:,0].argsort().argsort()
    groupranksum = np.bincount(intlab, weights=rankraw)
    # start at 1 for stats.rankdata :
    grouprankmean = groupranksum * 1.0 / groupnobs + 1
    return grouprankmean[intlab]


#new 
Example #8
Source File: test_morestats.py    From Computable with MIT License 6 votes vote down vote up
def test_trimmed2(self):
        x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]
        y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0]
        # Use center='trimmed'
        Xsq1, pval1 = stats.fligner(x, y, center='trimmed', proportiontocut=0.125)
        # Trim the data here, and use center='mean'
        Xsq2, pval2 = stats.fligner(x[1:-1], y[1:-1], center='mean')
        # Result should be the same.
        assert_almost_equal(Xsq1, Xsq2)
        assert_almost_equal(pval1, pval2)

    # The following test looks reasonable at first, but fligner() uses the
    # function stats.rankdata(), and in one of the cases in this test,
    # there are ties, while in the other (because of normal rounding
    # errors) there are not.  This difference leads to differences in the
    # third significant digit of W.
    #
    #def test_equal_mean_median(self):
    #    x = np.linspace(-1,1,21)
    #    y = x**3
    #    W1, pval1 = stats.fligner(x, y, center='mean')
    #    W2, pval2 = stats.fligner(x, y, center='median')
    #    assert_almost_equal(W1, W2)
    #    assert_almost_equal(pval1, pval2) 
Example #9
Source File: plot_tutorial2.py    From BrainSpace with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fusion(*args):
    from scipy.stats import rankdata
    from sklearn.preprocessing import minmax_scale

    max_rk = [None] * len(args)
    masks = [None] * len(args)
    for j, a in enumerate(args):
        m = masks[j] = a != 0
        a[m] = rankdata(a[m])
        max_rk[j] = a[m].max()

    max_rk = min(max_rk)
    for j, a in enumerate(args):
        m = masks[j]
        a[m] = minmax_scale(a[m], feature_range=(1, max_rk))

    return np.hstack(args)


# fuse the matrices 
Example #10
Source File: plot_tutorial2.py    From BrainSpace with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fusion(*args):
    from scipy.stats import rankdata
    from sklearn.preprocessing import minmax_scale

    max_rk = [None] * len(args)
    masks = [None] * len(args)
    for j, a in enumerate(args):
        m = masks[j] = a != 0
        a[m] = rankdata(a[m])
        max_rk[j] = a[m].max()

    max_rk = min(max_rk)
    for j, a in enumerate(args):
        m = masks[j]
        a[m] = minmax_scale(a[m], feature_range=(1, max_rk))

    return np.hstack(args)


# fuse the matrices 
Example #11
Source File: relabel_noisy_data.py    From freesound-classification with Apache License 2.0 6 votes vote down vote up
def score_samples(y_true, y_score):
    scores = []

    y_true = csr_matrix(y_true)
    y_score = -y_score

    n_samples, n_labels = y_true.shape

    for i, (start, stop) in enumerate(zip(y_true.indptr, y_true.indptr[1:])):
        relevant = y_true.indices[start:stop]

        if (relevant.size == 0 or relevant.size == n_labels):
            # If all labels are relevant or unrelevant, the score is also
            # equal to 1. The label ranking has no meaning.
            aux = 1.
        else:
            scores_i = y_score[i]
            rank = rankdata(scores_i, 'max')[relevant]
            L = rankdata(scores_i[relevant], 'max')
            aux = (L / rank).mean()

        scores.append(aux)

    return np.array(scores) 
Example #12
Source File: estimator_checks.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def check_decision_proba_consistency(name, estimator_orig):
    # Check whether an estimator having both decision_function and
    # predict_proba methods has outputs with perfect rank correlation.

    centers = [(2, 2), (4, 4)]
    X, y = make_blobs(n_samples=100, random_state=0, n_features=4,
                      centers=centers, cluster_std=1.0, shuffle=True)
    X_test = np.random.randn(20, 2) + 4
    estimator = clone(estimator_orig)

    if (hasattr(estimator, "decision_function") and
            hasattr(estimator, "predict_proba")):

        estimator.fit(X, y)
        a = estimator.predict_proba(X_test)[:, 1]
        b = estimator.decision_function(X_test)
        assert_array_equal(rankdata(a), rankdata(b)) 
Example #13
Source File: test_ocsvm.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example #14
Source File: test_abod.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example #15
Source File: linear_blend.py    From freesound-classification with Apache License 2.0 5 votes vote down vote up
def to_ranks(values):
    return np.array([rankdata(r) for r in values]) 
Example #16
Source File: test_abod.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example #17
Source File: test_lscp.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example #18
Source File: test_lscp.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example #19
Source File: test_sos.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example #20
Source File: test_knn.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example #21
Source File: test_knn.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example #22
Source File: test_knn.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example #23
Source File: test_knn.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example #24
Source File: test_pca.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example #25
Source File: test_sod.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example #26
Source File: test_sod.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_scores = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)
        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_scores), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example #27
Source File: test_iforest.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example #28
Source File: test_iforest.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example #29
Source File: RankDifference.py    From scattertext with Apache License 2.0 5 votes vote down vote up
def get_scores(self, a, b):
		to_ret = (rankdata(a, 'dense') / np.max(rankdata(a, 'dense'))
		          - rankdata(b, 'dense') / np.max(rankdata(b, 'dense')))

		if type(a) == pd.Series:
			return pd.Series(to_ret, index=a.index)
		return to_ret 
Example #30
Source File: test_cof.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_scores = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)
        print(pred_ranks)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_scores), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)