Python scipy.stats.rankdata() Examples

The following are code examples for showing how to use scipy.stats.rankdata(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: koeken   Author: twbattaglia   File: hclust2.py    MIT License 6 votes vote down vote up
def compute_f_dists( self ):
        if args.load_dist_matrix_f:
            with open( args.load_dist_matrix_f ) as inp:
                self.f_cdist_matrix = pickle.load( inp )

        else:
            dt = self.numpy_full_matrix
            
            if self.fdf == "spearman":
                dt_ranked = np.matrix([stats.rankdata(d) for d in dt])
                self.f_cdist_matrix = spd.pdist( dt_ranked, "correlation" )
                return
        
            if self.fdf == "pearson":
                self.fdf = 'correlation'

            self.f_cdist_matrix = spd.pdist( dt, self.fdf )

        if args.save_dist_matrix_f:
            with open( args.save_dist_matrix_f, "wb" ) as outf:
                pickle.dump( self.f_cdist_matrix, outf ) 
Example 2
Project: koeken   Author: twbattaglia   File: hclust2.py    MIT License 6 votes vote down vote up
def compute_s_dists( self ):
        if args.load_dist_matrix_s:
            with open( args.load_dist_matrix_s ) as inp:
                self.s_cdist_matrix = pickle.load( inp )
        else: 
            dt = self.numpy_full_matrix.transpose()
            
            if self.sdf == "spearman":
                dt_ranked = np.matrix([stats.rankdata(d) for d in dt])
                self.s_cdist_matrix = spd.pdist( dt_ranked, "correlation" )
                return
            
            if self.sdf == "pearson":
                self.sdf = 'correlation'
        
            self.s_cdist_matrix = spd.pdist( dt, self.sdf )
        
        if args.save_dist_matrix_s:
            with open( args.save_dist_matrix_s, "wb" ) as outf:
                pickle.dump( self.s_cdist_matrix, outf ) 
Example 3
Project: Multi-level-Stacker   Author: Jebq   File: ensemble.py    MIT License 6 votes vote down vote up
def predict(self, df):
		"""Calls iteratively the predict function of every models.

		Args:
			df (:obj:`pd.DataFrame`): Training set for the first level.
		"""
		predictions = np.zeros((len(df), len(self.models)))
		for i, model_name in enumerate(self.model_names):
			#print(' {}...'.format(model_name))
			try:
				predictions[:, i] = self.models[model_name].predict_proba(df)[:,1]
				print('{} - Classifier detected...probability of 1 predicted'.format(model_name))
			except:
				predictions[:, i] = self.models[model_name].predict(df).reshape(1,-1)[0]
				if self.binary_scale:
					predictions[:, i] = rankdata(predictions[:, i])/len(predictions[:, i])

		return predictions 
Example 4
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_ranking(self):
        x = ma.array([0,1,1,1,2,3,4,5,5,6,])
        assert_almost_equal(mstats.rankdata(x),
                           [1,3,3,3,5,6,7,8.5,8.5,10])
        x[[3,4]] = masked
        assert_almost_equal(mstats.rankdata(x),
                           [1,2.5,2.5,0,0,4,5,6.5,6.5,8])
        assert_almost_equal(mstats.rankdata(x, use_missing=True),
                            [1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
        x = ma.array([0,1,5,1,2,4,3,5,1,6,])
        assert_almost_equal(mstats.rankdata(x),
                           [1,3,8.5,3,5,7,6,8.5,3,10])
        x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
        assert_almost_equal(mstats.rankdata(x),
                            [[1,3,3,3,5], [6,7,8.5,8.5,10]])
        assert_almost_equal(mstats.rankdata(x, axis=1),
                           [[1,3,3,3,5], [1,2,3.5,3.5,5]])
        assert_almost_equal(mstats.rankdata(x,axis=0),
                           [[1,1,1,1,1], [2,2,2,2,2,]]) 
Example 5
Project: LaserTOF   Author: kyleuckert   File: test_morestats.py    MIT License 6 votes vote down vote up
def test_trimmed2(self):
        x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]
        y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0]
        # Use center='trimmed'
        Xsq1, pval1 = stats.fligner(x, y, center='trimmed',
                                    proportiontocut=0.125)
        # Trim the data here, and use center='mean'
        Xsq2, pval2 = stats.fligner(x[1:-1], y[1:-1], center='mean')
        # Result should be the same.
        assert_almost_equal(Xsq1, Xsq2)
        assert_almost_equal(pval1, pval2)

    # The following test looks reasonable at first, but fligner() uses the
    # function stats.rankdata(), and in one of the cases in this test,
    # there are ties, while in the other (because of normal rounding
    # errors) there are not.  This difference leads to differences in the
    # third significant digit of W.
    #
    #def test_equal_mean_median(self):
    #    x = np.linspace(-1,1,21)
    #    y = x**3
    #    W1, pval1 = stats.fligner(x, y, center='mean')
    #    W2, pval2 = stats.fligner(x, y, center='median')
    #    assert_almost_equal(W1, W2)
    #    assert_almost_equal(pval1, pval2) 
Example 6
Project: jr-tools   Author: kingjr   File: base.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def mannwhitneyu(x, y, use_continuity=True):
    """Adapated from scipy.stats.mannwhitneyu but includes direction of U"""
    from scipy.stats import rankdata, tiecorrect
    from scipy.stats import distributions
    from numpy import asarray
    x = asarray(x)
    y = asarray(y)
    n1 = len(x)
    n2 = len(y)
    ranked = rankdata(np.concatenate((x, y)))
    rankx = ranked[0:n1]  # get the x-ranks
    u1 = n1*n2 + (n1*(n1+1))/2.0 - np.sum(rankx, axis=0)  # calc U for x
    u2 = n1*n2 - u1  # remainder is U for y
    T = tiecorrect(ranked)
    if T == 0:
        raise ValueError('All numbers are identical in amannwhitneyu')
    sd = np.sqrt(T * n1 * n2 * (n1+n2+1) / 12.0)

    if use_continuity:
        # normal approximation for prob calc with continuity correction
        z = abs((u1 - 0.5 - n1*n2/2.0) / sd)
    else:
        z = abs((u1 - n1*n2/2.0) / sd)  # normal approximation for prob calc

    return u2, distributions.norm.sf(z) 
Example 7
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_mstats_basic.py    GNU General Public License v3.0 6 votes vote down vote up
def test_ranking(self):
        x = ma.array([0,1,1,1,2,3,4,5,5,6,])
        assert_almost_equal(mstats.rankdata(x),
                           [1,3,3,3,5,6,7,8.5,8.5,10])
        x[[3,4]] = masked
        assert_almost_equal(mstats.rankdata(x),
                           [1,2.5,2.5,0,0,4,5,6.5,6.5,8])
        assert_almost_equal(mstats.rankdata(x, use_missing=True),
                            [1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
        x = ma.array([0,1,5,1,2,4,3,5,1,6,])
        assert_almost_equal(mstats.rankdata(x),
                           [1,3,8.5,3,5,7,6,8.5,3,10])
        x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
        assert_almost_equal(mstats.rankdata(x),
                            [[1,3,3,3,5], [6,7,8.5,8.5,10]])
        assert_almost_equal(mstats.rankdata(x, axis=1),
                           [[1,3,3,3,5], [1,2,3.5,3.5,5]])
        assert_almost_equal(mstats.rankdata(x,axis=0),
                           [[1,1,1,1,1], [2,2,2,2,2,]]) 
Example 8
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_morestats.py    GNU General Public License v3.0 6 votes vote down vote up
def test_trimmed2(self):
        x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]
        y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0]
        # Use center='trimmed'
        Xsq1, pval1 = stats.fligner(x, y, center='trimmed', proportiontocut=0.125)
        # Trim the data here, and use center='mean'
        Xsq2, pval2 = stats.fligner(x[1:-1], y[1:-1], center='mean')
        # Result should be the same.
        assert_almost_equal(Xsq1, Xsq2)
        assert_almost_equal(pval1, pval2)

    # The following test looks reasonable at first, but fligner() uses the
    # function stats.rankdata(), and in one of the cases in this test,
    # there are ties, while in the other (because of normal rounding
    # errors) there are not.  This difference leads to differences in the
    # third significant digit of W.
    #
    #def test_equal_mean_median(self):
    #    x = np.linspace(-1,1,21)
    #    y = x**3
    #    W1, pval1 = stats.fligner(x, y, center='mean')
    #    W2, pval2 = stats.fligner(x, y, center='median')
    #    assert_almost_equal(W1, W2)
    #    assert_almost_equal(pval1, pval2) 
Example 9
Project: ochem_predict_nn   Author: connorcoley   File: lowe_interactive_predict.py    MIT License 6 votes vote down vote up
def score_candidates(reactants, candidate_list, xs):

	pred = model.predict(xs, batch_size = 20)[0]
	rank = ss.rankdata(pred)

	fname = raw_input('Enter file name to save to: ') + '.dat'
	with open(os.path.join(FROOT, fname), 'w') as fid:
		fid.write('FOR REACTANTS {}\n'.format(Chem.MolToSmiles(reactants)))
		fid.write('Candidate product\tCandidate edit\tProbability\tRank\n')
		for (c, candidate) in enumerate(candidate_list):
			candidate_smile = candidate[0]
			candidate_edit = candidate[1]
			fid.write('{}\t{}\t{}\t{}\n'.format(
				candidate_smile, candidate_edit, pred[c], 1 + len(pred) - rank[c]
			))
	print('Wrote to file {}'.format(os.path.join(FROOT, fname))) 
Example 10
Project: scattertext   Author: JasonKessler   File: ScaledFScore.py    Apache License 2.0 6 votes vote down vote up
def _get_scaler_function(scaler_algo):
		scaler = None
		if scaler_algo == 'normcdf':
			scaler = lambda x: norm.cdf(x, x.mean(), x.std())
		elif scaler_algo == 'lognormcdf':
			scaler = lambda x: norm.cdf(np.log(x), np.log(x).mean(), np.log(x).std())
		elif scaler_algo == 'percentile':
			scaler = lambda x: rankdata(x).astype(np.float64) / len(x)
		elif scaler_algo == 'percentiledense':
			scaler = lambda x: rankdata(x, method='dense').astype(np.float64) / len(x)
		elif scaler_algo == 'ecdf':
			from statsmodels.distributions import ECDF
			scaler = lambda x: ECDF(x)
		elif scaler_algo == 'none':
			scaler = lambda x: x
		else:
			raise InvalidScalerException("Invalid scaler alogrithm.  Must be either percentile or normcdf.")
		return scaler 
Example 11
Project: funimag   Author: paninski-lab   File: superpixel_analysis.py    MIT License 6 votes vote down vote up
def order_superpixels(permute_col, unique_pix, U_mat, V_mat):
    """
    order superpixels according to brightness
    """
    ####################### pull out all the superpixels ################################
    permute_col = list(permute_col);
    pos = [permute_col.index(x) for x in unique_pix];
    U_mat = U_mat[:,pos];
    V_mat = V_mat[:,pos];
    ####################### order pure superpixel according to brightness ############################
    brightness = np.zeros(len(unique_pix));

    u_max = U_mat.max(axis=0);
    v_max = V_mat.max(axis=0);
    brightness = u_max * v_max;
    brightness_arg = np.argsort(-brightness); #
    brightness_rank = U_mat.shape[1] - ss.rankdata(brightness,method="ordinal");
    return brightness_rank 
Example 12
Project: pygraphs   Author: vlivashkin   File: part3.py    MIT License 6 votes vote down vote up
def _print_results(results):
    mr_transposed = {}
    for column_name, column_results in results.items():
        mr_transposed[str(column_name)] = stats.rankdata([-column_results[x.name] for x in kernels], 'min')

    columns_right_order = [
        '(100, 2, 0.1)',
        '(100, 2, 0.15)',
        '(100, 4, 0.1)',
        '(100, 4, 0.15)',
        '(200, 2, 0.1)',
        '(200, 2, 0.15)',
        '(200, 4, 0.1)',
        '(200, 4, 0.15)',
        'sum'
    ]

    print('\t'.join(columns_right_order))
    for idx, kernel in enumerate(kernels):
        print(kernel.name, '\t', '\t'.join([str(mr_transposed[col_name][idx]) for col_name in columns_right_order])) 
Example 13
Project: vnpy_crypto   Author: birforce   File: multicomp.py    MIT License 6 votes vote down vote up
def runbasic_old(self, useranks=False):
        #check: refactoring screwed up case useranks=True

        #groupxsum = np.bincount(intlab, weights=X[:,0])
        #groupxmean = groupxsum * 1.0 / groupnobs
        x = self.x
        if useranks:
            self.xx = x[:,1].argsort().argsort() + 1  #rankraw
        else:
            self.xx = x[:,0]
        self.groupsum = groupranksum = np.bincount(self.intlab, weights=self.xx)
        #print('groupranksum', groupranksum, groupranksum.shape, self.groupnobs.shape
        # start at 1 for stats.rankdata :
        self.groupmean = grouprankmean = groupranksum * 1.0 / self.groupnobs # + 1
        self.groupmeanfilter = grouprankmean[self.intlab]
        #return grouprankmean[intlab] 
Example 14
Project: Anamoly-Detection   Author: msmsk05   File: test_lof.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)
        print(pred_ranks)
        print(pred_ranks)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 15
Project: Anamoly-Detection   Author: msmsk05   File: test_lof.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example 16
Project: Anamoly-Detection   Author: msmsk05   File: test_hbos.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 17
Project: Anamoly-Detection   Author: msmsk05   File: test_hbos.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example 18
Project: Anamoly-Detection   Author: msmsk05   File: test_mcd.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 19
Project: Anamoly-Detection   Author: msmsk05   File: test_iforest.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 20
Project: Anamoly-Detection   Author: msmsk05   File: test_iforest.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example 21
Project: Anamoly-Detection   Author: msmsk05   File: test_pca.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 22
Project: Anamoly-Detection   Author: msmsk05   File: test_pca.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example 23
Project: Anamoly-Detection   Author: msmsk05   File: test_knn.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 24
Project: Anamoly-Detection   Author: msmsk05   File: test_sos.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 25
Project: Anamoly-Detection   Author: msmsk05   File: test_sos.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example 26
Project: Anamoly-Detection   Author: msmsk05   File: test_lscp.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 27
Project: Anamoly-Detection   Author: msmsk05   File: test_lscp.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example 28
Project: Anamoly-Detection   Author: msmsk05   File: test_abod.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 29
Project: Anamoly-Detection   Author: msmsk05   File: test_ocsvm.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 30
Project: Anamoly-Detection   Author: msmsk05   File: test_ocsvm.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example 31
Project: Anamoly-Detection   Author: msmsk05   File: test_feature_bagging.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 32
Project: Anamoly-Detection   Author: msmsk05   File: test_feature_bagging.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example 33
Project: Anamoly-Detection   Author: msmsk05   File: test_xgbod.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)
        print(pred_ranks)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=4)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 34
Project: Anamoly-Detection   Author: msmsk05   File: test_cblof.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 35
Project: Anamoly-Detection   Author: msmsk05   File: test_cblof.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example 36
Project: Anamoly-Detection   Author: msmsk05   File: test_loci.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks) 
Example 37
Project: Anamoly-Detection   Author: msmsk05   File: test_loci.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks) 
Example 38
Project: pystran   Author: stijnvanhoey   File: sensitivity_regression.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _transform2rank(self, pars, output):
        '''
        hidden definition for rank transformation
        '''        
        print 'calclulating standardized values...'
        
        parranked = np.empty_like(pars)
        for i in range(pars.shape[1]):
            parranked[:,i] = stats.rankdata(pars[:,i])
        
        outputranked = np.empty_like(output)            
        for i in range(output.shape[1]):
            outputranked[:,i] = stats.rankdata(output[:,i])
        
        return parranked, outputranked 
Example 39
Project: Recession-Predictor   Author: tzhangwps   File: weighted_average.py    MIT License 5 votes vote down vote up
def calculate_model_weights(self):
        """
        Assign weights to each model's prediction.
        """
        cv_scores = []
        for model_name in self.cv_results:
            cv_scores.append(self.cv_results[model_name]['Best CV Score'])
        
        score_ranks = rankdata(cv_scores)
        iteration = 0
        for model_name in self.cv_results:
            self.model_weights[model_name] = self.rank_scheme[int(score_ranks[iteration])]
            iteration += 1
        self.metadata['Weights'] = self.model_weights 
Example 40
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_rankdata(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.rankdata(x)
            rm = stats.mstats.rankdata(x)
            assert_allclose(r, rm) 
Example 41
Project: LaserTOF   Author: kyleuckert   File: test_rank.py    MIT License 5 votes vote down vote up
def test_overflow(self):
        ntie, k = 2000, 5
        a = np.repeat(np.arange(k), ntie)
        n = a.size  # ntie * k
        out = tiecorrect(rankdata(a))
        assert_equal(out, 1.0 - k * (ntie**3 - ntie) / float(n**3 - n)) 
Example 42
Project: LaserTOF   Author: kyleuckert   File: test_rank.py    MIT License 5 votes vote down vote up
def test_empty(self):
        """stats.rankdata([]) should return an empty array."""
        a = np.array([], dtype=int)
        r = rankdata(a)
        assert_array_equal(r, np.array([], dtype=np.float64))
        r = rankdata([])
        assert_array_equal(r, np.array([], dtype=np.float64)) 
Example 43
Project: LaserTOF   Author: kyleuckert   File: test_rank.py    MIT License 5 votes vote down vote up
def test_basic(self):
        """Basic tests of stats.rankdata."""
        data = [100, 10, 50]
        expected = np.array([3.0, 1.0, 2.0], dtype=np.float64)
        a = np.array(data, dtype=int)
        r = rankdata(a)
        assert_array_equal(r, expected)
        r = rankdata(data)
        assert_array_equal(r, expected)

        data = [40, 10, 30, 10, 50]
        expected = np.array([4.0, 1.5, 3.0, 1.5, 5.0], dtype=np.float64)
        a = np.array(data, dtype=int)
        r = rankdata(a)
        assert_array_equal(r, expected)
        r = rankdata(data)
        assert_array_equal(r, expected)

        data = [20, 20, 20, 10, 10, 10]
        expected = np.array([5.0, 5.0, 5.0, 2.0, 2.0, 2.0], dtype=np.float64)
        a = np.array(data, dtype=int)
        r = rankdata(a)
        assert_array_equal(r, expected)
        r = rankdata(data)
        assert_array_equal(r, expected)
        # The docstring states explicitly that the argument is flattened.
        a2d = a.reshape(2, 3)
        r = rankdata(a2d)
        assert_array_equal(r, expected) 
Example 44
Project: LaserTOF   Author: kyleuckert   File: test_rank.py    MIT License 5 votes vote down vote up
def test_rankdata_object_string(self):
        min_rank = lambda a: [1 + sum(i < j for i in a) for j in a]
        max_rank = lambda a: [sum(i <= j for i in a) for j in a]
        ordinal_rank = lambda a: min_rank([(x, i) for i, x in enumerate(a)])

        def average_rank(a):
            return [(i + j) / 2.0 for i, j in zip(min_rank(a), max_rank(a))]

        def dense_rank(a):
            b = np.unique(a)
            return [1 + sum(i < j for i in b) for j in a]

        rankf = dict(min=min_rank, max=max_rank, ordinal=ordinal_rank,
                     average=average_rank, dense=dense_rank)

        def check_ranks(a):
            for method in 'min', 'max', 'dense', 'ordinal', 'average':
                out = rankdata(a, method=method)
                assert_array_equal(out, rankf[method](a))

        val = ['foo', 'bar', 'qux', 'xyz', 'abc', 'efg', 'ace', 'qwe', 'qaz']
        check_ranks(np.random.choice(val, 200))
        check_ranks(np.random.choice(val, 200).astype('object'))

        val = np.array([0, 1, 2, 2.718, 3, 3.141], dtype='object')
        check_ranks(np.random.choice(val, 200).astype('object')) 
Example 45
Project: LaserTOF   Author: kyleuckert   File: test_rank.py    MIT License 5 votes vote down vote up
def test_large_int(self):
        data = np.array([2**60, 2**60+1], dtype=np.uint64)
        r = rankdata(data)
        assert_array_equal(r, [1.0, 2.0])

        data = np.array([2**60, 2**60+1], dtype=np.int64)
        r = rankdata(data)
        assert_array_equal(r, [1.0, 2.0])

        data = np.array([2**60, -2**60+1], dtype=np.int64)
        r = rankdata(data)
        assert_array_equal(r, [2.0, 1.0]) 
Example 46
Project: LaserTOF   Author: kyleuckert   File: test_rank.py    MIT License 5 votes vote down vote up
def test_big_tie(self):
        for n in [10000, 100000, 1000000]:
            data = np.ones(n, dtype=int)
            r = rankdata(data)
            expected_rank = 0.5 * (n + 1)
            assert_array_equal(r, expected_rank * data,
                               "test failed with n=%d" % n) 
Example 47
Project: LaserTOF   Author: kyleuckert   File: test_rank.py    MIT License 5 votes vote down vote up
def test_cases():

    def check_case(values, method, expected):
        r = rankdata(values, method=method)
        assert_array_equal(r, expected)

    for values, method, expected in _cases:
        yield check_case, values, method, expected 
Example 48
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_algos.py    MIT License 5 votes vote down vote up
def test_scipy_compat(self):
        from scipy.stats import rankdata

        def _check(arr):
            mask = ~np.isfinite(arr)
            arr = arr.copy()
            result = libalgos.rank_1d_float64(arr)
            arr[mask] = np.inf
            exp = rankdata(arr)
            exp[mask] = nan
            assert_almost_equal(result, exp)

        _check(np.array([nan, nan, 5.0, 5.0, 5.0, nan, 1, 2, 3, nan]))
        _check(np.array([4.0, nan, 5.0, 5.0, 5.0, nan, 1, 2, 4.0, nan])) 
Example 49
Project: recruit   Author: Frank-qlu   File: test_algos.py    Apache License 2.0 5 votes vote down vote up
def test_scipy_compat(self):
        from scipy.stats import rankdata

        def _check(arr):
            mask = ~np.isfinite(arr)
            arr = arr.copy()
            result = libalgos.rank_1d_float64(arr)
            arr[mask] = np.inf
            exp = rankdata(arr)
            exp[mask] = nan
            assert_almost_equal(result, exp)

        _check(np.array([nan, nan, 5., 5., 5., nan, 1, 2, 3, nan]))
        _check(np.array([4., nan, 5., 5., 5., nan, 1, 2, 4., nan])) 
Example 50
Project: jr-tools   Author: kingjr   File: base.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def repeated_spearman(X, y, dtype=None):
    """Computes spearman correlations between a vector and a matrix.

    Parameters
    ----------
        X : np.array, shape (n_samples, n_measures ...)
            Data matrix onto which the vector is correlated.
        y : np.array, shape (n_samples)
            Data vector.
        dtype : type, optional
            Data type used to compute correlation values to optimize memory.

    Returns
    -------
        rho : np.array, shape (n_measures)
    """
    from scipy.stats import rankdata
    if not isinstance(X, np.ndarray):
        X = np.array(X)
    if X.ndim == 1:
        X = X[:, None]
    shape = X.shape
    X = np.reshape(X, [shape[0], -1])
    if X.ndim not in [1, 2] or y.ndim != 1 or X.shape[0] != y.shape[0]:
        raise ValueError('y must be a vector, and X a matrix with an equal'
                         'number of rows.')

    # Rank
    X = np.apply_along_axis(rankdata, 0, X)
    y = np.apply_along_axis(rankdata, 0, y)
    # Double rank to ensure that normalization step of compute_corr
    # (X -= mean(X)) remains an integer.
    X *= 2
    y *= 2
    X = np.array(X, dtype=dtype)
    y = np.array(y, dtype=dtype)
    R = repeated_corr(X, y, dtype=type(y[0]))
    R = np.reshape(R, shape[1:])
    return R 
Example 51
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_mstats_basic.py    GNU General Public License v3.0 5 votes vote down vote up
def test_rankdata(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.rankdata(x)
            rm = stats.mstats.rankdata(x)
            assert_allclose(r, rm) 
Example 52
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_rank.py    GNU General Public License v3.0 5 votes vote down vote up
def test_empty(self):
        """stats.rankdata([]) should return an empty array."""
        a = np.array([], dtype=np.int)
        r = rankdata(a)
        assert_array_equal(r, np.array([], dtype=np.float64))
        r = rankdata([])
        assert_array_equal(r, np.array([], dtype=np.float64)) 
Example 53
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_rank.py    GNU General Public License v3.0 5 votes vote down vote up
def test_one(self):
        """Check stats.rankdata with an array of length 1."""
        data = [100]
        a = np.array(data, dtype=np.int)
        r = rankdata(a)
        assert_array_equal(r, np.array([1.0], dtype=np.float64))
        r = rankdata(data)
        assert_array_equal(r, np.array([1.0], dtype=np.float64)) 
Example 54
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_rank.py    GNU General Public License v3.0 5 votes vote down vote up
def test_large_int(self):
        data = np.array([2**60, 2**60+1], dtype=np.uint64)
        r = rankdata(data)
        assert_array_equal(r, [1.0, 2.0])

        data = np.array([2**60, 2**60+1], dtype=np.int64)
        r = rankdata(data)
        assert_array_equal(r, [1.0, 2.0])

        data = np.array([2**60, -2**60+1], dtype=np.int64)
        r = rankdata(data)
        assert_array_equal(r, [2.0, 1.0]) 
Example 55
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_rank.py    GNU General Public License v3.0 5 votes vote down vote up
def test_big_tie(self):
        for n in [10000, 100000, 1000000]:
            data = np.ones(n, dtype=int)
            r = rankdata(data)
            expected_rank = 0.5 * (n + 1)
            assert_array_equal(r, expected_rank * data,
                               "test failed with n=%d" % n) 
Example 56
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_rank.py    GNU General Public License v3.0 5 votes vote down vote up
def test_cases():

    def check_case(values, method, expected):
        r = rankdata(values, method=method)
        assert_array_equal(r, expected)

    for values, method, expected in _cases:
        yield check_case, values, method, expected 
Example 57
Project: scattertext   Author: JasonKessler   File: TermDocMatrix.py    Apache License 2.0 5 votes vote down vote up
def _get_scaler_function(self, scaler_algo):
        scaler = None
        if scaler_algo == 'percentile':
            scaler = lambda x: rankdata(x).astype(np.float64) / len(x)
        elif scaler_algo == 'normcdf':
            # scaler = lambda x: ECDF(x[cat_word_counts != 0])(x)
            scaler = lambda x: norm.cdf(x, x.mean(), x.std())
        elif scaler_algo == 'none':
            scaler = lambda x: x
        else:
            raise InvalidScalerException("Invalid scaler alogrithm.  Must be either percentile or normcdf.")
        return scaler 
Example 58
Project: scattertext   Author: JasonKessler   File: TermDocMatrix.py    Apache License 2.0 5 votes vote down vote up
def _get_percentiles_from_freqs(self, freqs):
        return rankdata(freqs) / len(freqs) 
Example 59
Project: scattertext   Author: JasonKessler   File: Scalers.py    Apache License 2.0 5 votes vote down vote up
def scale_neg_1_to_1_with_zero_mean_rank_abs_max(v):
	rankv = v * 2 - 1
	pos_v = rankv[rankv > 0]
	pos_v = rankdata(pos_v, 'dense')
	pos_v = pos_v / pos_v.max()
	neg_v = rankv[rankv < 0]
	neg_v = rankdata(neg_v, 'dense')
	neg_v = neg_v / neg_v.max()
	rankv[rankv > 0] = pos_v
	rankv[rankv < 0] = - (neg_v.max() - neg_v)

	return scale_neg_1_to_1_with_zero_mean_abs_max(rankv) 
Example 60
Project: scattertext   Author: JasonKessler   File: Scalers.py    Apache License 2.0 5 votes vote down vote up
def percentile(vec, terms=None, other_vec=None):
	vec_ss = rankdata(vec, method='average') * (1. / len(vec))
	return scale_0_to_1(vec_ss) 
Example 61
Project: scattertext   Author: JasonKessler   File: Scalers.py    Apache License 2.0 5 votes vote down vote up
def dense_rank(vec, terms=None, other_vec=None):
	vec_ss = rankdata(vec, method='dense') * (1. / len(vec))
	return scale_0_to_1(vec_ss) 
Example 62
Project: scattertext   Author: JasonKessler   File: Scalers.py    Apache License 2.0 5 votes vote down vote up
def percentile_min(vec, terms=None, other_vec=None):
	vec_ss = rankdata(vec, method='min') * (1. / len(vec))
	return scale_0_to_1(vec_ss) 
Example 63
Project: scattertext   Author: JasonKessler   File: CornerScore.py    Apache License 2.0 5 votes vote down vote up
def _get_percentiles_from_freqs(freqs):
		return rankdata(freqs) * 1. / len(freqs) 
Example 64
Project: scattertext   Author: JasonKessler   File: RankDifference.py    Apache License 2.0 5 votes vote down vote up
def get_scores(self, a, b):
		to_ret = (rankdata(a, 'dense') / np.max(rankdata(a, 'dense'))
		          - rankdata(b, 'dense') / np.max(rankdata(b, 'dense')))

		if type(a) == pd.Series:
			return pd.Series(to_ret, index=a.index)
		return to_ret 
Example 65
Project: scattertext   Author: JasonKessler   File: ScatterChart.py    Apache License 2.0 5 votes vote down vote up
def _term_rank_score_and_frequency_df(self, all_categories, category, other_categories, scores):
        df = self._get_term_category_frequencies()
        self._add_x_and_y_coords_to_term_df_if_injected(df)

        if scores is None:
            scores = self._get_default_scores(category, other_categories, df)
        # np.array(self.term_doc_matrix.get_rudder_scores(category))
        # convention_df['category score'] = np.array(self.term_doc_matrix.get_rudder_scores(category))
        category_column_name = category + ' freq'
        df['category score'] = CornerScore.get_scores_for_category(
            df[category_column_name],
            df[[c + ' freq' for c in other_categories]].sum(axis=1)
        )
        if self.scatterchartdata.term_significance is not None:
            df['p'] = get_p_vals(df, category_column_name,
                                 self.scatterchartdata.term_significance)
        df['not category score'] = CornerScore.get_scores_for_category(
            df[[c + ' freq' for c in other_categories]].sum(axis=1),
            df[category_column_name]
        )
        df['color_scores'] = scores
        if self.scatterchartdata.terms_to_include is None:
            df = self._filter_bigrams_by_minimum_not_category_term_freq(
                category_column_name, other_categories, df)
            df = filter_bigrams_by_pmis(
                self._filter_by_minimum_term_frequency(all_categories, df),
                threshold_coef=self.scatterchartdata.pmi_threshold_coefficient
            )

        if self.scatterchartdata.filter_unigrams:
            df = filter_out_unigrams_that_only_occur_in_one_bigram(df)
        if len(df) == 0:
            raise NoWordMeetsTermFrequencyRequirementsError()
        df['category score rank'] = rankdata(df['category score'], method='ordinal')
        df['not category score rank'] = rankdata(df['not category score'], method='ordinal')
        if self.scatterchartdata.max_terms and self.scatterchartdata.max_terms < len(df):
            assert self.scatterchartdata.max_terms > 0
            df = self._limit_max_terms(category, df)
        df = df.reset_index()
        return df 
Example 66
Project: scattertext   Author: JasonKessler   File: DefaultBackgroundFrequencies.py    Apache License 2.0 5 votes vote down vote up
def get_background_rank_df(cls, frequency_path=None):
		df = cls.get_background_frequency_df(frequency_path)
		df['rank'] = rankdata(df.background, method='dense')
		df['background'] = df['rank'] / df['rank'].max()
		return df[['background']] 
Example 67
Project: scattertext   Author: JasonKessler   File: AssociationCompactor.py    Apache License 2.0 5 votes vote down vote up
def get_rank_df(self, term_doc_matrix):
        tdf = term_doc_matrix.get_term_freq_df('')
        tdf_sum = tdf.sum(axis=1)
        score_data = {}
        for category in term_doc_matrix.get_categories():
            score_data[category] = self.scorer().get_scores(tdf[category], tdf_sum - tdf[category])
        return pd.DataFrame(score_data, index=tdf.index).apply(lambda x: rankdata(x, 'dense')) 
Example 68
Project: scattertext   Author: JasonKessler   File: DenseRankCharacteristicness.py    Apache License 2.0 5 votes vote down vote up
def _rerank_scores(self, scores):
		ranks = rankdata(scores, 'dense')
		ranks = ranks / ranks.max()
		return ranks, 0.5 
Example 69
Project: scattertext   Author: JasonKessler   File: DenseRankCharacteristicness.py    Apache License 2.0 5 votes vote down vote up
def get_scores(self, corpus):
		'''
		Parameters
		----------
		corpus

		Returns
		-------
		float, pd.Series
		float: point on x-axis at even characteristicness
		pd.Series: term -> value between 0 and 1, sorted by score in a descending manner
		Background scores from corpus
		'''
		term_ranks = self.term_ranker(corpus).get_ranks()

		freq_df = pd.DataFrame({
			'corpus': term_ranks.sum(axis=1),
			'standard': self.background_frequencies.get_background_frequency_df()['background']}
		).dropna()
		corpus_rank = rankdata(freq_df.corpus, 'dense')
		standard_rank = rankdata(freq_df.standard, 'dense')
		scores = corpus_rank/corpus_rank.max() - standard_rank/standard_rank.max()


		#scores = RankDifference().get_scores(bg['corpus'], bg['bg']).sort_values()
		# import pdb; pdb.set_trace()
		if self.rerank_ranks:
			rank_scores, zero_marker = self._rerank_scores(scores)
			freq_df['score'] = pd.Series(rank_scores, index=freq_df.index)
		else:
			if scores.min() < 0 and scores.max() > 0:
				zero_marker = -scores.min() / (scores.max() - scores.min())
			elif scores.min() > 0:
				zero_marker = 0
			else:
				zero_marker = 1
			freq_df['score'] = scale(scores)
		return zero_marker, freq_df.sort_values(by='score', ascending=False)['score'] 
Example 70
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_algos.py    MIT License 5 votes vote down vote up
def test_scipy_compat(self):
        from scipy.stats import rankdata

        def _check(arr):
            mask = ~np.isfinite(arr)
            arr = arr.copy()
            result = libalgos.rank_1d_float64(arr)
            arr[mask] = np.inf
            exp = rankdata(arr)
            exp[mask] = nan
            assert_almost_equal(result, exp)

        _check(np.array([nan, nan, 5., 5., 5., nan, 1, 2, 3, nan]))
        _check(np.array([4., nan, 5., 5., 5., nan, 1, 2, 4., nan])) 
Example 71
Project: pmf-automl   Author: rsheth80   File: plotting.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def compare_ranks(regrets, num_decimals=10):

    all_results = np.stack([regrets[k] for k in regrets.keys()], axis=2)
    all_results = np.around(all_results, num_decimals)
    ranks0 = np.apply_along_axis(st.rankdata, 2, all_results)
    ranks = {}
    for i in range(len(regrets.keys())):
        ranks[list(regrets.keys())[i]] = ranks0[:,:,i].squeeze()

    n_evals = regrets[list(regrets.keys())[0]].shape[0]
    plot_comparison(n_evals, ranks)
    plt.ylabel('Rank (mean $\pm$ SE)')
    plt.show() 
Example 72
Project: pmf-automl   Author: rsheth80   File: bo.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def init_l1(Ytrain, Ftrain, ftest, n_init=5):

    dis = np.abs(Ftrain - ftest).sum(axis=1)
    ix_closest = np.argsort(dis)[:n_init]
    ix_nonnan_pipelines \
            = np.where(np.invert(np.isnan(Ytrain[:,ix_closest].sum(axis=1))))[0]
    ranks = np.apply_along_axis(st.rankdata, 0,
                                Ytrain[ix_nonnan_pipelines[:,None],ix_closest])
    ave_pipeline_ranks = ranks.mean(axis=1)
    ix_init = ix_nonnan_pipelines[np.argsort(ave_pipeline_ranks)[::-1]]

    return ix_init[:n_init] 
Example 73
Project: pygraphs   Author: vlivashkin   File: stats.py    MIT License 5 votes vote down vote up
def __get_ranked(results):
    """

    :param results: dict (key - kernel name, value - performance metric)
    :return: dict (key - kernel name, value -  rank)
    """
    rank_info = ((1 + len(results)) - rankdata(list(results.values())))
    to_return = {}
    kernels = list(results.keys())
    for i in range(0, len(results)):
        kernel = kernels[i]
        to_return[kernel] = rank_info[i]
    return to_return 
Example 74
Project: pygraphs   Author: vlivashkin   File: scorer.py    MIT License 5 votes vote down vote up
def ranking(measure1_ari, measure2_ari):
    assert measure1_ari.shape == measure2_ari.shape
    n = measure1_ari.shape[0]

    # 1. генерируем ранги
    measure1_rank = stats.rankdata(-measure1_ari)
    measure2_rank = stats.rankdata(-measure2_ari)

    # 2. Для каждой пары мер считаем сумму квадратов разностей
    sum_sq_delta = np.sum(np.power(measure1_rank - measure2_rank, 2))

    # 3. По формуле Спирмена считаем элементы матрицы корреляций
    return 1 - (6 * sum_sq_delta) / ((n - 1) * n * (n + 1)) 
Example 75
Project: vnpy_crypto   Author: birforce   File: infotheo.py    MIT License 5 votes vote down vote up
def discretize(X, method="ef", nbins=None):
    """
    Discretize `X`

    Parameters
    ----------
    bins : int, optional
        Number of bins.  Default is floor(sqrt(N))
    method : string
        "ef" is equal-frequency binning
        "ew" is equal-width binning

    Examples
    --------
    """
    nobs = len(X)
    if nbins == None:
        nbins = np.floor(np.sqrt(nobs))
    if method == "ef":
        discrete = np.ceil(nbins * stats.rankdata(X)/nobs)
    if method == "ew":
        width = np.max(X) - np.min(X)
        width = np.floor(width/nbins)
        svec, ivec = stats.fastsort(X)
        discrete = np.zeros(nobs)
        binnum = 1
        base = svec[0]
        discrete[ivec[0]] = binnum
        for i in range(1,nobs):
            if svec[i] < base + width:
                discrete[ivec[i]] = binnum
            else:
                base = svec[i]
                binnum += 1
                discrete[ivec[i]] = binnum
    return discrete
#TODO: looks okay but needs more robust tests for corner cases 
Example 76
Project: vnpy_crypto   Author: birforce   File: multicomp.py    MIT License 5 votes vote down vote up
def __init__(self, x, useranks=False, uni=None, intlab=None):
        '''descriptive statistics by groups

        Parameters
        ----------
        x : array, 2d
            first column data, second column group labels
        useranks : boolean
            if true, then use ranks as data corresponding to the
            scipy.stats.rankdata definition (start at 1, ties get mean)
        uni, intlab : arrays (optional)
            to avoid call to unique, these can be given as inputs


        '''
        self.x = np.asarray(x)
        if intlab is None:
            uni, intlab = np.unique(x[:,1], return_inverse=True)
        elif uni is None:
            uni = np.unique(x[:,1])

        self.useranks = useranks


        self.uni = uni
        self.intlab = intlab
        self.groupnobs = groupnobs = np.bincount(intlab)

        #temporary until separated and made all lazy
        self.runbasic(useranks=useranks) 
Example 77
Project: ConvKB   Author: daiquocnguyen   File: eval.py    Apache License 2.0 4 votes vote down vote up
def test_prediction(x_batch, y_batch, head_or_tail='head'):

                        hits10 = 0.0
                        mrr = 0.0
                        mr = 0.0

                        for i in range(len(x_batch)):
                            new_x_batch = np.tile(x_batch[i], (len(entity2id), 1))
                            new_y_batch = np.tile(y_batch[i], (len(entity2id), 1))
                            if head_or_tail == 'head':
                                new_x_batch[:, 0] = entity_array
                            else:  # 'tail'
                                new_x_batch[:, 2] = entity_array

                            lstIdx = []
                            for tmpIdxTriple in range(len(new_x_batch)):
                                tmpTriple = (new_x_batch[tmpIdxTriple][0], new_x_batch[tmpIdxTriple][1],
                                             new_x_batch[tmpIdxTriple][2])
                                if (tmpTriple in train) or (tmpTriple in valid) or (tmpTriple in test): #also remove the valid test triple
                                    lstIdx.append(tmpIdxTriple)
                            new_x_batch = np.delete(new_x_batch, lstIdx, axis=0)
                            new_y_batch = np.delete(new_y_batch, lstIdx, axis=0)

                            #thus, insert the valid test triple again, to the beginning of the array
                            new_x_batch = np.insert(new_x_batch, 0, x_batch[i], axis=0) #thus, the index of the valid test triple is equal to 0
                            new_y_batch = np.insert(new_y_batch, 0, y_batch[i], axis=0)

                            # while len(new_x_batch) % ((int(args.neg_ratio) + 1) * args.batch_size) != 0:
                            #    new_x_batch = np.append(new_x_batch, [x_batch[i]], axis=0)
                            #    new_y_batch = np.append(new_y_batch, [y_batch[i]], axis=0)

                            results = []
                            listIndexes = range(0, len(new_x_batch), (int(args.neg_ratio) + 1) * args.batch_size)
                            for tmpIndex in range(len(listIndexes) - 1):
                                results = np.append(results, predict(
                                    new_x_batch[listIndexes[tmpIndex]:listIndexes[tmpIndex + 1]],
                                    new_y_batch[listIndexes[tmpIndex]:listIndexes[tmpIndex + 1]]))
                            results = np.append(results,
                                                predict(new_x_batch[listIndexes[-1]:], new_y_batch[listIndexes[-1]:]))

                            results = np.reshape(results, -1)
                            results_with_id = rankdata(results, method='ordinal')
                            _filter = results_with_id[0]

                            mr += _filter
                            mrr += 1.0 / _filter
                            if _filter <= 10:
                                hits10 += 1

                        return np.array([mr, mrr, hits10]) 
Example 78
Project: rank-based-INT   Author: edm1   File: rank_based_inverse_normal_transformation.py    MIT License 4 votes vote down vote up
def rank_INT(series, c=3.0/8, stochastic=True):
    """ Perform rank-based inverse normal transformation on pandas series.
        If stochastic is True ties are given rank randomly, otherwise ties will
        share the same value. NaN values are ignored.

        Args:
            param1 (pandas.Series):   Series of values to transform
            param2 (Optional[float]): Constand parameter (Bloms constant)
            param3 (Optional[bool]):  Whether to randomise rank of ties
        
        Returns:
            pandas.Series
    """

    # Check input
    assert(isinstance(series, pd.Series))
    assert(isinstance(c, float))
    assert(isinstance(stochastic, bool))

    # Set seed
    np.random.seed(123)

    # Take original series indexes
    orig_idx = series.index

    # Drop NaNs
    series = series.loc[~pd.isnull(series)]

    # Get ranks
    if stochastic == True:
        # Shuffle by index
        series = series.loc[np.random.permutation(series.index)]
        # Get rank, ties are determined by their position in the series (hence
        # why we randomised the series)
        rank = ss.rankdata(series, method="ordinal")
    else:
        # Get rank, ties are averaged
        rank = ss.rankdata(series, method="average")

    # Convert numpy array back to series
    rank = pd.Series(rank, index=series.index)

    # Convert rank to normal distribution
    transformed = rank.apply(rank_to_normal, c=c, n=len(rank))
    
    return transformed[orig_idx] 
Example 79
Project: funimag   Author: paninski-lab   File: superpixel_analysis.py    MIT License 4 votes vote down vote up
def fast_sep_nmf(M, r, th, normalize=1):
    """
    Find pure superpixels. solve nmf problem M = M(:,K)H, K is a subset of M's columns.

    Parameters:
    ----------------
    M: 2d np.array, dimension T x idx
        temporal components of superpixels.
    r: int scalar
        maximum number of pure superpixels you want to find.  Usually it's set to idx, which is number of superpixels.
    th: double scalar, correlation threshold
        Won't pick up two pure superpixels, which have correlation higher than th.
    normalize: Boolean.
        Normalize L1 norm of each column to 1 if True.  Default is True.

    Return:
    ----------------
    pure_pixels: 1d np.darray, dimension d x 1. (d is number of pure superpixels)
        pure superpixels for these superpixels, actually column indices of M.
    """

    pure_pixels = [];
    if normalize == 1:
        M = M/np.sum(M, axis=0,keepdims=True);

    normM = np.sum(M**2, axis=0,keepdims=True);
    normM_orig = normM.copy();
    normM_sqrt = np.sqrt(normM);
    nM = np.sqrt(normM);
    ii = 0;
    U = np.zeros([M.shape[0], r]);
    while ii < r and (normM_sqrt/nM).max() > th:
        ## select the column of M with largest relative l2-norm
        temp = normM/normM_orig;
        pos = np.where(temp == temp.max())[1][0];
        ## check ties up to 1e-6 precision
        pos_ties = np.where((temp.max() - temp)/temp.max() <= 1e-6)[1];

        if len(pos_ties) > 1:
            pos = pos_ties[np.where(normM_orig[0,pos_ties] == (normM_orig[0,pos_ties]).max())[0][0]];
        ## update the index set, and extracted column
        pure_pixels.append(pos);
        U[:,ii] = M[:,pos].copy();
        for jj in range(ii):
            U[:,ii] = U[:,ii] - U[:,jj]*sum(U[:,jj]*U[:,ii])

        U[:,ii] = U[:,ii]/np.sqrt(sum(U[:,ii]**2));
        normM = np.maximum(0, normM - np.matmul(U[:,[ii]].T, M)**2);
        normM_sqrt = np.sqrt(normM);
        ii = ii+1;
    #coef = np.matmul(np.matmul(np.linalg.inv(np.matmul(M[:,pure_pixels].T, M[:,pure_pixels])), M[:,pure_pixels].T), M);
    pure_pixels = np.array(pure_pixels);
    #coef_rank = coef.copy(); ##### from large to small
    #for ii in range(len(pure_pixels)):
    #	coef_rank[:,ii] = [x for _,x in sorted(zip(len(pure_pixels) - ss.rankdata(coef[:,ii]), pure_pixels))];
    return pure_pixels #, coef, coef_rank 
Example 80
Project: funimag   Author: paninski-lab   File: superpixel_analysis.py    MIT License 4 votes vote down vote up
def vanilla_nmf_lasso(Yd, num_component, maxiter, tol, penalty_param, c=None):
    if Yd.min() < 0:
        Yd -= Yd.min(axis=2, keepdims=True);

    y0 = Yd.reshape(np.prod(Yd.shape[:2]),-1,order="F");
    if c is None:
        c = np.random.rand(y0.shape[1],num_component);
        c = c*np.sqrt(y0.mean()/num_component);

    clf_c = linear_model.Lasso(alpha=(penalty_param/(2*y0.shape[0])),positive=True,fit_intercept=False);
    clf_a = linear_model.Lasso(alpha=(penalty_param/(2*y0.shape[1])),positive=True,fit_intercept=True);
    res = np.zeros(maxiter);
    for iters in range(maxiter):
        temp = clf_a.fit(c, y0.T);
        a = temp.coef_;
        b = temp.intercept_;
        b = b.reshape(b.shape[0],1,order="F");
        c = clf_c.fit(a, y0-b).coef_;
        b = np.maximum(0, y0.mean(axis=1,keepdims=True)-(a*(c.mean(axis=0,keepdims=True))).sum(axis=1,keepdims=True));

        res[iters] = np.linalg.norm(y0 - np.matmul(a, c.T) - b,"fro")**2 + penalty_param*(abs(a).sum() + abs(c).sum());
        if iters > 0 and abs(res[iters] - res[iters-1])/res[iters-1] <= tol:
            break;
    if iters > 0:
        print(abs(res[iters] - res[iters-1])/res[iters-1]);

    temp = np.sqrt((a**2).sum(axis=0,keepdims=True));
    c = c*temp;
    a = a/temp;
    brightness = np.zeros(a.shape[1]);
    a_max = a.max(axis=0);
    c_max = c.max(axis=0);
    brightness = a_max * c_max;
    brightness_rank = np.argsort(-brightness); #a.shape[1] - ss.rankdata(brightness,method="ordinal");
    a = a[:,brightness_rank];
    c = c[:,brightness_rank];

    corr_img_all_r = a.copy();
    for ii in range(a.shape[1]):
        corr_img_all_r[:,ii] = vcorrcoef2(y0, c[:,ii]);
    #corr_img_all_r = np.corrcoef(y0,c.T)[:y0.shape[0],y0.shape[0]:];
    corr_img_all_r = corr_img_all_r.reshape(Yd.shape[0],Yd.shape[1],-1,order="F");
    return {"a":a, "c":c, "b":b, "res":res, "corr_img_all_r":corr_img_all_r}