Python scipy.stats.kendalltau() Examples

The following are code examples for showing how to use scipy.stats.kendalltau(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_kendalltau(self):
        # Tests some computations of Kendall's tau
        x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
        y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
        z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
                            [+0.3333333,0.4969059])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
                            [-0.5477226,0.2785987])
        #
        x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20,
                            10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
        y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27,
                            25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
        result = mstats.kendalltau(x,y)
        assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])

        # test for namedtuple attributes
        res = mstats.kendalltau(x, y)
        attributes = ('correlation', 'pvalue')
        check_named_results(res, attributes, ma=True) 
Example 2
Project: dataprep   Author: sfu-db   File: compute.py    MIT License 6 votes vote down vote up
def kendall_tau_1xn(
    x: da.Array,
    data: da.Array,
    value_range: Optional[Tuple[float, float]] = None,
    k: Optional[int] = None,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Parameters
    ----------
    x : da.Array
    data : da.Array
    value_range : Optional[Tuple[float, float]] = None
    k : Optional[int] = None
    """

    _, ncols = data.shape

    corrs = []
    for j in range(ncols):
        corr = dask.delayed(lambda a, b: kendalltau(a, b)[0])(x, data[:, j])
        corrs.append(corr)

    (corrs,) = da.compute(corrs)
    corrs = np.asarray(corrs)
    return corr_filter(corrs, value_range, k) 
Example 3
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: nanops.py    MIT License 6 votes vote down vote up
def get_corr_func(method):
    if method in ["kendall", "spearman"]:
        from scipy.stats import kendalltau, spearmanr
    elif callable(method):
        return method

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {"pearson": _pearson, "kendall": _kendall, "spearman": _spearman}
    return _cor_methods[method] 
Example 4
Project: recruit   Author: Frank-qlu   File: nanops.py    Apache License 2.0 6 votes vote down vote up
def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr
    elif callable(method):
        return method

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method] 
Example 5
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_mstats_basic.py    GNU General Public License v3.0 6 votes vote down vote up
def test_kendalltau(self):
        # Tests some computations of Kendall's tau
        x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
        y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
        z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
                            [+0.3333333,0.4969059])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
                            [-0.5477226,0.2785987])
        #
        x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20,
                            10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
        y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27,
                            25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
        result = mstats.kendalltau(x,y)
        assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009]) 
Example 6
Project: FUTU_Stop_Loss   Author: BigtoC   File: nanops.py    MIT License 6 votes vote down vote up
def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method] 
Example 7
Project: vnpy_crypto   Author: birforce   File: nanops.py    MIT License 6 votes vote down vote up
def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method] 
Example 8
Project: Computable   Author: ktraunmueller   File: nanops.py    MIT License 6 votes vote down vote up
def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method] 
Example 9
Project: poker   Author: surgebiswas   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_kendalltau(self):
        # Tests some computations of Kendall's tau
        x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
        y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
        z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
                            [+0.3333333,0.4969059])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
                            [-0.5477226,0.2785987])
        #
        x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20,
                            10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
        y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27,
                            25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
        result = mstats.kendalltau(x,y)
        assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])

        # test for namedtuple attributes
        res = mstats.kendalltau(x, y)
        attributes = ('correlation', 'pvalue')
        check_named_results(res, attributes, ma=True) 
Example 10
Project: NeuroRA   Author: neurora   File: rsa_corr.py    MIT License 6 votes vote down vote up
def rsa_correlation_kendall(RDM1, RDM2):
    cons = np.shape(RDM1)[0]

    n = 0

    while cons > 1:
        n = n + cons - 1
        cons = cons - 1

    nn = 0

    v1 = np.zeros([n], dtype=np.float64)
    v2 = np.zeros([n], dtype=np.float64)

    cons = np.shape(RDM1)[0]

    for i in range(cons - 1):

        for j in range(cons - 1 - i):
            v1[nn] = RDM1[i, i + j + 1]
            v2[nn] = RDM2[i, i + j + 1]

            nn = nn + 1

    return kendalltau(v1, v2) 
Example 11
Project: predictive-maintenance-using-machine-learning   Author: awslabs   File: nanops.py    Apache License 2.0 6 votes vote down vote up
def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr
    elif callable(method):
        return method

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method] 
Example 12
Project: pycopula   Author: blent-ai   File: copula.py    Apache License 2.0 6 votes vote down vote up
def correlations(self, X):
		"""
		Compute the correlations of the specified data. Only available when dimension of copula is 2.

		Parameters
		----------
		X : numpy array (of size n * 2)
			Values to compute correlations.

		Returns
		-------
		kendall : float
			The Kendall tau.
		pearson : float
			The Pearson's R
		spearman : float
			The Spearman's R
		"""
		if self.dim != 2:
			raise Exception("Correlations can not be computed when dimension is greater than 2.")
		self.kendall = kendalltau(X[:,0], X[:,1])[0]
		self.pearson = pearsonr(X[:,0], X[:,1])[0]
		self.spearman = spearmanr(X[:,0], X[:,1])[0]
		return self.kendall, self.pearson, self.spearman 
Example 13
Project: fund   Author: Frank-qlu   File: nanops.py    Apache License 2.0 6 votes vote down vote up
def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr
    elif callable(method):
        return method

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method] 
Example 14
Project: psychrometric-chart-makeover   Author: buds-lab   File: nanops.py    MIT License 6 votes vote down vote up
def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr
    elif callable(method):
        return method

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method] 
Example 15
Project: copula-py   Author: stochasticresearch   File: copulafit.py    GNU General Public License v3.0 6 votes vote down vote up
def _gaussian_PKTE(X):
    # the algorithm for this comes from the paper:
    # "Gaussian Copula Precision Estimation with Missing Values" 
    # by Huahua Wang, Faridel Fazayeli, Soumyadeep Chatterjee, Arindam Banerjee
    N = X.shape[1]
    sigma_hat = np.ones((N,N))
    for dim1 in range(0,N-1):
        for dim2 in range(dim1+1,N):
            rho = np.sin(math.pi/2 * kendalltau(X[:,dim1],X[:,dim2]))
            # correlation matrix is symmetric
            sigma_hat[dim1][dim2] = rho
            sigma_hat[dim2][dim1] = rho
            
    # ensure that sigma_hat is positive semidefinite
    sigma_hat = _nearPD(sigma_hat)
            
    return sigma_hat

# TODO: T copula stuff 
Example 16
Project: copula-py   Author: stochasticresearch   File: multivariate_stats.py    GNU General Public License v3.0 6 votes vote down vote up
def kendalls_tau(X):
    """
    Calculates a generalized Kendall's tau for a data set given by X, as 
    described by "Multivariate Extensions of Spearman's Rho and Related Statistics"
    
    Inputs:
      X - the input data, should be a numpy array of shape = M x N, where
          M is the number of samples, and N is the dimensionality of the data
    """
    M = X.shape[0]
    N = X.shape[1]
    if N<2:
        raise ValueError('To calculate Kendall\'s Tau, need data of dimensionality >= 2')
    
    ktau = 0.0
    for dim1 in range(0,N-1):
        for dim2 in range(dim1+1,N):
            (t,p) = kendalltau(X[:,dim1],X[:,dim2])
            ktau = ktau + t
    # normalize
    ktau = ktau / comb(N,2)
    return ktau 
Example 17
Project: wine-ml-on-aws-lambda   Author: pierreant   File: nanops.py    Apache License 2.0 6 votes vote down vote up
def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method] 
Example 18
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_mstats_basic.py    Apache License 2.0 6 votes vote down vote up
def test_kendalltau(self):
        # Tests some computations of Kendall's tau
        x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
        y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
        z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
                            [+0.3333333,0.4969059])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
                            [-0.5477226,0.2785987])
        #
        x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20,
                            10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
        y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27,
                            25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
        result = mstats.kendalltau(x,y)
        assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])

        # test for namedtuple attributes
        res = mstats.kendalltau(x, y)
        attributes = ('correlation', 'pvalue')
        check_named_results(res, attributes, ma=True) 
Example 19
Project: metk   Author: PatWalters   File: metk_report.py    MIT License 5 votes vote down vote up
def metk_report(df_kcal):
    """
    Generate a report
    :param df_kcal: input dataframe, activity should be in kcal/mol
    :param outfile: output file for the report
    :return: the report as a list of strings
    """
    N = df_kcal.shape[0]
    pred = df_kcal['Pred']
    expr = df_kcal['Exp']
    rms_val = rmse(pred, expr)
    mae_val = mean_absolute_error(pred, expr)
    pearson_r, pearson_p = pearsonr(pred, expr)
    pearson_vals = [x ** 2 for x in [pearson_r] + list(pearson_confidence(pearson_r, N))]
    spearman_r, spearman_p = spearmanr(pred, expr)
    kendall_t, kendall_p = kendalltau(pred, expr)
    max_correlation = max_possible_correlation([log10(kcal_to_ki(x, "M")) for x in df_kcal['Exp']])
    report = []
    report.append("N = %d" % N)
    report.append("RMSE = %.2f kcal/mol" % rms_val)
    report.append("MAE  = %.2f kcal/mol" % mae_val)
    report.append("Max possible correlation = %.2f" % max_correlation)
    report.append("Pearson R^2 = %0.2f  95%%CI = %.2f %.2f" % tuple(pearson_vals))
    report.append("Spearman rho = %0.2f" % spearman_r)
    report.append("Kendall tau = %0.2f" % kendall_t)
    return report 
Example 20
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_kendalltau(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.kendalltau(x, y)
            rm = stats.mstats.kendalltau(xm, ym)
            assert_almost_equal(r[0], rm[0], decimal=10)
            assert_almost_equal(r[1], rm[1], decimal=7) 
Example 21
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 5 votes vote down vote up
def test_kendalltau_vs_mstats_basic():
    np.random.seed(42)
    for s in range(2,10):
        a = []
        # Generate rankings with ties
        for i in range(s):
            a += [i]*i
        b = list(a)
        np.random.shuffle(a)
        np.random.shuffle(b)
        expected = mstats_basic.kendalltau(a, b)
        actual = stats.kendalltau(a, b)
        assert_approx_equal(actual[0], expected[0])
        assert_approx_equal(actual[1], expected[1]) 
Example 22
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 5 votes vote down vote up
def test_kendalltau_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [1., 2., 3., 4.]
    y = [np.nan, 2.4, 3.4, 3.4]

    r1 = stats.kendalltau(x, y, nan_policy='omit')
    r2 = stats.kendalltau(x[1:], y[1:])
    assert_allclose(r1.correlation, r2.correlation, atol=1e-15) 
Example 23
Project: dataprep   Author: sfu-db   File: compute.py    MIT License 5 votes vote down vote up
def kendall_tau_nxn(data: da.Array) -> da.Array:
    """
    Kendal Tau correlation calculation of a n x n correlation matrix for n columns
    """
    _, ncols = data.shape

    corrmat = np.zeros(shape=(ncols, ncols))
    corr_list = []
    for i in range(ncols):
        for j in range(i + 1, ncols):
            tmp = dask.delayed(lambda a, b: kendalltau(a, b).correlation)(
                data[:, i], data[:, j]
            )
            corr_list.append(tmp)
    corr_comp = dask.compute(*corr_list)  # TODO avoid explicitly compute
    idx = 0
    for i in range(ncols):  # TODO: Optimize by using numpy api
        for j in range(i + 1, ncols):
            corrmat[i][j] = corr_comp[idx]
            idx = idx + 1

    corrmat2 = corrmat + corrmat.T
    np.fill_diagonal(corrmat2, 1)
    corrmat = da.from_array(corrmat2)

    return corrmat 
Example 24
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_nanops.py    MIT License 5 votes vote down vote up
def test_nancorr_kendall(self):
        from scipy.stats import kendalltau

        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="kendall")
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="kendall") 
Example 25
Project: recruit   Author: Frank-qlu   File: test_nanops.py    Apache License 2.0 5 votes vote down vote up
def test_nancorr_kendall(self):
        from scipy.stats import kendalltau
        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='kendall') 
Example 26
Project: recruit   Author: Frank-qlu   File: test_analytics.py    Apache License 2.0 5 votes vote down vote up
def test_corr_rank(self):
        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        tm.assert_almost_equal(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        tm.assert_almost_equal(result, expected)

        # these methods got rewritten in 0.8
        if LooseVersion(scipy.__version__) < LooseVersion('0.9'):
            pytest.skip("skipping corr rank because of scipy version "
                        "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        tm.assert_almost_equal(A.corr(B, method='kendall'), kexp)
        tm.assert_almost_equal(A.corr(B, method='spearman'), sexp) 
Example 27
Project: Fractalis   Author: LCSB-BioCore   File: main.py    Apache License 2.0 5 votes vote down vote up
def compute_stats(df: pd.DataFrame, method: str) -> dict:
        """Compute correlation statistics for the given data and the given
        correlation method.
        :param df: The DataFrame containing our data.
        :param method: The method to use.
        :return: Several relevant statistics
        """
        df = df.drop_duplicates('id')
        df = df.dropna()
        if df.shape[0] < 2:
            return {
                'coef': float('nan'),
                'p_value': float('nan'),
                'slope': float('nan'),
                'intercept': float('nan')
            }
        if method == 'pearson':
            corr_coef, p_value = stats.pearsonr(df['value_x'], df['value_y'])
        elif method == 'spearman':
            corr_coef, p_value = stats.spearmanr(df['value_x'], df['value_y'])
        elif method == 'kendall':
            corr_coef, p_value = stats.kendalltau(df['value_x'], df['value_y'])
        else:
            raise ValueError("Unknown correlation method.")
        slope, intercept, *_ = np.polyfit(df['value_x'], df['value_y'], deg=1)
        return {
            'coef': corr_coef,
            'p_value': p_value,
            'slope': slope,
            'intercept': intercept,
        } 
Example 28
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_mstats_basic.py    GNU General Public License v3.0 5 votes vote down vote up
def test_kendalltau(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.kendalltau(x, y)
            rm = stats.mstats.kendalltau(xm, ym)
            assert_almost_equal(r[0], rm[0], decimal=10)
            assert_almost_equal(r[1], rm[1], decimal=7) 
Example 29
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_stats.py    GNU General Public License v3.0 5 votes vote down vote up
def test_kendalltau():
    # with some ties
    x1 = [12, 2, 1, 12, 2]
    x2 = [1, 4, 7, 1, 0]
    expected = (-0.47140452079103173, 0.24821309157521476)
    res = stats.kendalltau(x1, x2)
    assert_approx_equal(res[0], expected[0])
    assert_approx_equal(res[1], expected[1])

    # with only ties in one or both inputs
    assert_equal(stats.kendalltau([2,2,2], [2,2,2]), (np.nan, np.nan))
    assert_equal(stats.kendalltau([2,0,2], [2,2,2]), (np.nan, np.nan))
    assert_equal(stats.kendalltau([2,2,2], [2,0,2]), (np.nan, np.nan))

    # empty arrays provided as input
    assert_equal(stats.kendalltau([], []), (np.nan, np.nan))

    # check two different sort methods
    assert_approx_equal(stats.kendalltau(x1, x2, initial_lexsort=False)[1],
                        stats.kendalltau(x1, x2, initial_lexsort=True)[1])

    # and with larger arrays
    np.random.seed(7546)
    x = np.array([np.random.normal(loc=1, scale=1, size=500),
                np.random.normal(loc=1, scale=1, size=500)])
    corr = [[1.0, 0.3],
            [0.3, 1.0]]
    x = np.dot(np.linalg.cholesky(corr), x)
    expected = (0.19291382765531062, 1.1337108207276285e-10)
    res = stats.kendalltau(x[0], x[1])
    assert_approx_equal(res[0], expected[0])
    assert_approx_equal(res[1], expected[1])

    # and do we get a tau of 1 for identical inputs?
    assert_approx_equal(stats.kendalltau([1,1,2], [1,1,2])[0], 1.0) 
Example 30
Project: rankeval   Author: hpclab   File: kendall_tau.py    Mozilla Public License 2.0 5 votes vote down vote up
def eval_per_query(self, y, y_pred):
        """
        This methods computes Kendall tau at per query level (on the instances
        belonging to a specific query). The Kendall tau per query is
        calculated as:

            tau = (P - Q) / sqrt((P + Q + T) * (P + Q + U))

        where P is the number of concordant pairs, Q the number of discordant
        pairs, T the number of ties only in x, and U the number of ties only
        in y. If a tie occurs for the same pair in both x and y, it is not
        added to either T or U.
        s
        Whether to use lexsort or quicksort as the sorting method for the
        initial sort of the inputs.  Default is lexsort (True), for which
        kendalltau is of complexity O(n log(n)). If False, the complexity
        is O(n^2), but with a smaller pre-factor (so quicksort may be faster
        for small arrays).

        Parameters
        ----------
        y: numpy array
            Represents the labels of instances corresponding to one query in
            the dataset (ground truth).
        y_pred: numpy array.
            Represents the predicted document scores obtained during the model
            scoring phase for that query.

        Returns
        -------
        kendalltau: float
            The Kendall tau per query.
        """
        kendall_tau = stats.kendalltau(y, y_pred, initial_lexsort=True)
        return kendall_tau.correlation 
Example 31
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_nanops.py    MIT License 5 votes vote down vote up
def test_nancorr_kendall(self):
        from scipy.stats import kendalltau
        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='kendall') 
Example 32
Project: FUTU_Stop_Loss   Author: BigtoC   File: test_analytics.py    MIT License 5 votes vote down vote up
def test_corr_rank(self):
        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        tm.assert_almost_equal(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        tm.assert_almost_equal(result, expected)

        # these methods got rewritten in 0.8
        if LooseVersion(scipy.__version__) < LooseVersion('0.9'):
            pytest.skip("skipping corr rank because of scipy version "
                        "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        tm.assert_almost_equal(A.corr(B, method='kendall'), kexp)
        tm.assert_almost_equal(A.corr(B, method='spearman'), sexp) 
Example 33
Project: hoaxy-network   Author: shaochengcheng   File: data_process.py    GNU General Public License v3.0 5 votes vote down vote up
def rank_correlation_bot_centrality(top=1000,
                                    fn1='ubs.csv',
                                    fn2='centralities.ranked.raw_id.csv',
                                    fn3='centralities.ranked.values.csv'):
    if top > 1000:
        raise ValueError('Top should not larger than 1000!')
    df1 = pd.read_csv(fn1)
    bmap = df1.set_index('user_raw_id').bot_score_en
    df2 = pd.read_csv(fn2)
    df3 = pd.read_csv(fn3)
    df2 = df2.iloc[:top]
    df3 = df3.iloc[:top]
    spearmans = []
    kendalls = []
    for c in df3.columns:
        bs = bmap.loc[df2[c].values]
        df = pd.DataFrame(
            dict(centrality=df3[c].values.copy(), bot_score=bs.values.copy()))
        df = df.loc[df.bot_score.notnull()]
        a1 = df.centrality.values
        a2 = df.bot_score.values
        rho, rhop = spearmanr(a1, a2)
        tau, taup = kendalltau(a1, a2)
        spearmans.append((c, rho, rhop))
        kendalls.append((c, tau, taup))
    df = pd.DataFrame(
        spearmans, columns=['centrality', 'correlation', 'pvalue'])
    df.to_csv(
        'rank_correlation_bot_centrality.spearman.{}.csv'.format(top),
        index=False)
    df = pd.DataFrame(kendalls, columns=['centrality', 'correlation', 'pvalue'])
    df.to_csv(
        'rank_correlation_bot_centrality.kendall.{}.csv'.format(top),
        index=False) 
Example 34
Project: vnpy_crypto   Author: birforce   File: test_nanops.py    MIT License 5 votes vote down vote up
def test_nancorr_kendall(self):
        from scipy.stats import kendalltau
        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='kendall') 
Example 35
Project: vnpy_crypto   Author: birforce   File: test_analytics.py    MIT License 5 votes vote down vote up
def test_corr_rank(self):
        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        tm.assert_almost_equal(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        tm.assert_almost_equal(result, expected)

        # these methods got rewritten in 0.8
        if LooseVersion(scipy.__version__) < LooseVersion('0.9'):
            pytest.skip("skipping corr rank because of scipy version "
                        "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        tm.assert_almost_equal(A.corr(B, method='kendall'), kexp)
        tm.assert_almost_equal(A.corr(B, method='spearman'), sexp) 
Example 36
Project: ble5-nrf52-mac   Author: tomasero   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_kendalltau(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.kendalltau(x, y)
            rm = stats.mstats.kendalltau(xm, ym)
            assert_almost_equal(r[0], rm[0], decimal=10)
            assert_almost_equal(r[1], rm[1], decimal=7) 
Example 37
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes vote down vote up
def test_kendalltau_vs_mstats_basic():
    np.random.seed(42)
    for s in range(2,10):
        a = []
        # Generate rankings with ties
        for i in range(s):
            a += [i]*i
        b = list(a)
        np.random.shuffle(a)
        np.random.shuffle(b)
        expected = mstats_basic.kendalltau(a, b)
        actual = stats.kendalltau(a, b)
        assert_approx_equal(actual[0], expected[0])
        assert_approx_equal(actual[1], expected[1]) 
Example 38
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 5 votes vote down vote up
def test_kendalltau_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [1., 2., 3., 4.]
    y = [np.nan, 2.4, 3.4, 3.4]

    r1 = stats.kendalltau(x, y, nan_policy='omit')
    r2 = stats.kendalltau(x[1:], y[1:])
    assert_allclose(r1.correlation, r2.correlation, atol=1e-15) 
Example 39
Project: vwoptimize   Author: denik   File: vwoptimize.py    MIT License 5 votes vote down vote up
def kendall_tau(y_true, y_score):
    from scipy.stats import kendalltau
    ret_score = kendalltau(y_true, y_score)[0]
    return ret_score if not np.isnan(ret_score) else 0.0 
Example 40
Project: Computable   Author: ktraunmueller   File: test_stats.py    MIT License 5 votes vote down vote up
def test_kendalltau():
    # with some ties
    x1 = [12, 2, 1, 12, 2]
    x2 = [1, 4, 7, 1, 0]
    expected = (-0.47140452079103173, 0.24821309157521476)
    res = stats.kendalltau(x1, x2)
    assert_approx_equal(res[0], expected[0])
    assert_approx_equal(res[1], expected[1])

    # with only ties in one or both inputs
    assert_(np.all(np.isnan(stats.kendalltau([2,2,2], [2,2,2]))))
    assert_(np.all(np.isnan(stats.kendalltau([2,0,2], [2,2,2]))))
    assert_(np.all(np.isnan(stats.kendalltau([2,2,2], [2,0,2]))))

    # check two different sort methods
    assert_approx_equal(stats.kendalltau(x1, x2, initial_lexsort=False)[1],
                        stats.kendalltau(x1, x2, initial_lexsort=True)[1])

    # and with larger arrays
    np.random.seed(7546)
    x = np.array([np.random.normal(loc=1, scale=1, size=500),
                np.random.normal(loc=1, scale=1, size=500)])
    corr = [[1.0, 0.3],
            [0.3, 1.0]]
    x = np.dot(np.linalg.cholesky(corr), x)
    expected = (0.19291382765531062, 1.1337108207276285e-10)
    res = stats.kendalltau(x[0], x[1])
    assert_approx_equal(res[0], expected[0])
    assert_approx_equal(res[1], expected[1])

    # and do we get a tau of 1 for identical inputs?
    assert_approx_equal(stats.kendalltau([1,1,2], [1,1,2])[0], 1.0) 
Example 41
Project: poker   Author: surgebiswas   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_kendalltau(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.kendalltau(x, y)
            rm = stats.mstats.kendalltau(xm, ym)
            assert_almost_equal(r[0], rm[0], decimal=10)
            assert_almost_equal(r[1], rm[1], decimal=7) 
Example 42
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 5 votes vote down vote up
def test_kendalltau_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [1., 2., 3., 4.]
    y = [np.nan, 2.4, 3.4, 3.4]

    r1 = stats.kendalltau(x, y, nan_policy='omit')
    r2 = stats.kendalltau(x[1:], y[1:])
    assert_allclose(r1.correlation, r2.correlation, atol=1e-15) 
Example 43
Project: pl2mind   Author: MRN-Code   File: snp_kendalltau.py    GNU General Public License v2.0 5 votes vote down vote up
def g(i):
    taus = tonumpyarray(shared_taus)
    taus[i] = taus[i] + kendalltau(snps[:, i], labels)[0] 
Example 44
Project: cptm   Author: NLeSC   File: experiment_corr_pca_ches.py    Apache License 2.0 5 votes vote down vote up
def do_kendallt(list1, list2, alpha=0.05):
    c, p = kendalltau(list1, list2)

    if p < alpha:
        return c
    return 'n.s.' 
Example 45
Project: P3_image_processing   Author: latedude2   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_kendalltau(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.kendalltau(x, y)
            rm = stats.mstats.kendalltau(xm, ym)
            assert_almost_equal(r[0], rm[0], decimal=10)
            assert_almost_equal(r[1], rm[1], decimal=7) 
Example 46
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_kendalltau_vs_mstats_basic():
    np.random.seed(42)
    for s in range(2,10):
        a = []
        # Generate rankings with ties
        for i in range(s):
            a += [i]*i
        b = list(a)
        np.random.shuffle(a)
        np.random.shuffle(b)
        expected = mstats_basic.kendalltau(a, b)
        actual = stats.kendalltau(a, b)
        assert_approx_equal(actual[0], expected[0])
        assert_approx_equal(actual[1], expected[1]) 
Example 47
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_kendalltau_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [1., 2., 3., 4.]
    y = [np.nan, 2.4, 3.4, 3.4]

    r1 = stats.kendalltau(x, y, nan_policy='omit')
    r2 = stats.kendalltau(x[1:], y[1:])
    assert_allclose(r1.correlation, r2.correlation, atol=1e-15) 
Example 48
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 5 votes vote down vote up
def test_kendall_tau_large():
    n = 172.
    x = np.arange(n)
    y = np.arange(n)
    _, pval = stats.kendalltau(x, y, method='exact')
    assert_equal(pval, 0.0)
    y[-1], y[-2] = y[-2], y[-1]
    _, pval = stats.kendalltau(x, y, method='exact')
    assert_equal(pval, 0.0)
    y[-3], y[-4] = y[-4], y[-3]
    _, pval = stats.kendalltau(x, y, method='exact')
    assert_equal(pval, 0.0) 
Example 49
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_kendalltau(self):
        # Tests some computations of Kendall's tau
        x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
        y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
        z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
                            [+0.3333333,0.4969059])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
                            [-0.5477226,0.2785987])
        #
        x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20,
                            10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
        y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27,
                            25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
        result = mstats.kendalltau(x,y)
        assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
        # make sure internal variable use correct precision with
        # larger arrays
        x = np.arange(2000, dtype=float)
        x = ma.masked_greater(x, 1995)
        y = np.arange(2000, dtype=float)
        y = np.concatenate((y[1000:], y[:1000]))
        assert_(np.isfinite(mstats.kendalltau(x,y)[1]))

        # test for namedtuple attributes
        res = mstats.kendalltau(x, y)
        attributes = ('correlation', 'pvalue')
        check_named_results(res, attributes, ma=True) 
Example 50
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_kendalltau(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.kendalltau(x, y)
            rm = stats.mstats.kendalltau(xm, ym)
            assert_almost_equal(r[0], rm[0], decimal=10)
            assert_almost_equal(r[1], rm[1], decimal=7) 
Example 51
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes vote down vote up
def test_kendalltau_vs_mstats_basic():
    np.random.seed(42)
    for s in range(2,10):
        a = []
        # Generate rankings with ties
        for i in range(s):
            a += [i]*i
        b = list(a)
        np.random.shuffle(a)
        np.random.shuffle(b)
        expected = mstats_basic.kendalltau(a, b)
        actual = stats.kendalltau(a, b)
        assert_approx_equal(actual[0], expected[0])
        assert_approx_equal(actual[1], expected[1]) 
Example 52
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 5 votes vote down vote up
def test_kendalltau_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [1., 2., 3., 4.]
    y = [np.nan, 2.4, 3.4, 3.4]

    r1 = stats.kendalltau(x, y, nan_policy='omit')
    r2 = stats.kendalltau(x[1:], y[1:])
    assert_allclose(r1.correlation, r2.correlation, atol=1e-15) 
Example 53
Project: LinearSieve   Author: gregversteeg   File: test_sieve.py    Apache License 2.0 5 votes vote down vote up
def test_discrete():
    out = linearsieve.Sieve(n_hidden=1, **kwargs)
    y = out.fit_transform(test_array_d)
    assert np.allclose(kendalltau(y[:, 0], test_array_d[:, 0])[0], 1) 
Example 54
Project: LinearSieve   Author: gregversteeg   File: test_weak_correlations.py    Apache License 2.0 5 votes vote down vote up
def score(true, predicted):
    """Compare n true signals to some number of predicted signals.
    For each true signal take the min RMSE of each predicted.
    Signals are standardized first."""
    rs = []
    for t in true.T:
        rs.append(max(np.abs(kendalltau(t, p)[0]) for p in predicted.T))
    return np.array(rs) 
Example 55
Project: predictive-maintenance-using-machine-learning   Author: awslabs   File: test_nanops.py    Apache License 2.0 5 votes vote down vote up
def test_nancorr_kendall(self):
        from scipy.stats import kendalltau
        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='kendall') 
Example 56
Project: predictive-maintenance-using-machine-learning   Author: awslabs   File: test_analytics.py    Apache License 2.0 5 votes vote down vote up
def test_corr_rank(self):
        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        tm.assert_almost_equal(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        tm.assert_almost_equal(result, expected)

        # these methods got rewritten in 0.8
        if LooseVersion(scipy.__version__) < LooseVersion('0.9'):
            pytest.skip("skipping corr rank because of scipy version "
                        "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        tm.assert_almost_equal(A.corr(B, method='kendall'), kexp)
        tm.assert_almost_equal(A.corr(B, method='spearman'), sexp) 
Example 57
Project: fund   Author: Frank-qlu   File: test_nanops.py    Apache License 2.0 5 votes vote down vote up
def test_nancorr_kendall(self):
        from scipy.stats import kendalltau
        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='kendall') 
Example 58
Project: fund   Author: Frank-qlu   File: test_analytics.py    Apache License 2.0 5 votes vote down vote up
def test_corr_rank(self):
        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        tm.assert_almost_equal(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        tm.assert_almost_equal(result, expected)

        # these methods got rewritten in 0.8
        if LooseVersion(scipy.__version__) < LooseVersion('0.9'):
            pytest.skip("skipping corr rank because of scipy version "
                        "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        tm.assert_almost_equal(A.corr(B, method='kendall'), kexp)
        tm.assert_almost_equal(A.corr(B, method='spearman'), sexp) 
Example 59
Project: copulae   Author: DanielBok   File: correlation.py    MIT License 5 votes vote down vote up
def _get_corr_func(method: str):
    """
    Determines the correlation function

    Parameters
    ----------
    method: str
        Correlation function name

    Returns
    -------
    Callable
        The correlation function
    """

    method = method.lower()
    valid_methods = {'pearson', 'kendall', 'spearman', 'tau', 'rho'}

    if method not in valid_methods:
        raise ValueError(f"method must be one of {', '.join(valid_methods)}")

    if method in {'kendall', 'tau'}:
        return stats.kendalltau
    elif method in {'spearman', 'rho'}:
        return stats.spearmanr
    else:
        return stats.pearsonr 
Example 60
Project: TCC   Author: lukascivil   File: rs.py    MIT License 5 votes vote down vote up
def PredictionAccuracy(self, y_true, y_pred):
        tau = stats.kendalltau(y_true, y_pred)[0]
        mae = mean_absolute_error(y_true, y_pred)
        rmse = sqrt(mean_squared_error(y_true, y_pred))

        return {"Tau": tau, "MAE": mae, "RMSE": rmse}

    # Train K-Means 
Example 61
Project: densray   Author: pdufter   File: evaluation.py    MIT License 5 votes vote down vote up
def compute_kendalls(self):
        """Computes kendalls tau

            Returns:
                tau: float; kendalls tau
                p_value: float;
        """
        tau, p_value = stats.kendalltau(self.y_pred, self.y_true)
        return tau, p_value 
Example 62
Project: cqf   Author: neotrinity   File: copy_calibration.py    MIT License 5 votes vote down vote up
def covariance(self, variates):
        dimension, noOfObservations = variates.shape
        covarianceData = numpy.zeros([dimension, dimension])
        for i in xrange(dimension):
            for j in xrange(i + 1):
                tau, pv = kendalltau(variates[i], variates[j])
                covarianceData[i, j] = math.sin(math.pi/2.0 * tau)
                covarianceData[j, i] = covarianceData[i, j]
        return covarianceData 
Example 63
Project: psychrometric-chart-makeover   Author: buds-lab   File: test_nanops.py    MIT License 5 votes vote down vote up
def test_nancorr_kendall(self):
        from scipy.stats import kendalltau
        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='kendall') 
Example 64
Project: psychrometric-chart-makeover   Author: buds-lab   File: test_analytics.py    MIT License 5 votes vote down vote up
def test_corr_rank(self):
        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        tm.assert_almost_equal(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        tm.assert_almost_equal(result, expected)

        # these methods got rewritten in 0.8
        if LooseVersion(scipy.__version__) < LooseVersion('0.9'):
            pytest.skip("skipping corr rank because of scipy version "
                        "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        tm.assert_almost_equal(A.corr(B, method='kendall'), kexp)
        tm.assert_almost_equal(A.corr(B, method='spearman'), sexp) 
Example 65
Project: bevel   Author: Shopify   File: linear_ordinal_regression.py    MIT License 5 votes vote down vote up
def _compute_score(self, X, y):
        x_beta = X.dot(self.beta_)
        return kendalltau(x_beta, y).correlation 
Example 66
Project: rejection-qa   Author: becxer   File: evaluator.py    Apache License 2.0 5 votes vote down vote up
def evaluate(self, data: List[ContextAndQuestion], true_len, **kwargs):
        best_spans = kwargs["span"]
        span_logits = kwargs["score"]
        if self.eval == "triviaqa":
            scores = trivia_span_scores(data, best_spans)
        elif self.eval == "squad":
            scores = squad_span_scores(data, best_spans)
        else:
            raise RuntimeError()

        has_answer = np.array([len(x.answer.answer_spans) > 0 for x in data])

        selected_paragraphs = {}
        for i, point in enumerate(data):
            if self.per_doc:
                key = (point.question_id, point.doc_id)
            else:
                key = point.question_id
            if key not in selected_paragraphs:
                selected_paragraphs[key] = i
            elif span_logits[i] > span_logits[selected_paragraphs[key]]:
                selected_paragraphs[key] = i
        selected_paragraphs = list(selected_paragraphs.values())

        out = {
            "question-text-em": scores[selected_paragraphs, 2].mean(),
            "question-text-f1": scores[selected_paragraphs, 3].mean(),
        }

        if self.k_tau:
            out["text-em-k-tau"] = kendalltau(span_logits, scores[:, 2])[0]
            out["text-f1-k-tau"] = kendalltau(span_logits, scores[:, 3])[0]

        if self.paragraph_level:
            out["paragraph-text-em"] = scores[has_answer, 2].mean()
            out["paragraph-text-f1"] = scores[has_answer, 3].mean()

        prefix = "b%d/" % self.bound
        return Evaluation({prefix+k: v for k,v in out.items()}) 
Example 67
Project: rejection-qa   Author: becxer   File: evaluator.py    Apache License 2.0 5 votes vote down vote up
def evaluate(self, data: List[ContextAndQuestion], true_len, **kargs):
        if self.text_eval == "triviaqa":
            scores = trivia_span_scores(data, kargs["spans"])
        elif self.text_eval == "squad":
            scores = squad_span_scores(data, kargs["spans"])
        else:
            raise RuntimeError()

        has_answer = [len(x.answer.answer_spans) > 0 for x in data]
        aggregated_scores = scores[has_answer].mean(axis=0)
        prefix ="b%d/" % self.bound
        scalars = {
            prefix + "accuracy": aggregated_scores[0],
            prefix + "f1": aggregated_scores[1],
            prefix + "text-accuracy": aggregated_scores[2],
            prefix + "text-f1": aggregated_scores[3]
        }

        if self.rank_metric == "spr":
            metric = spearmanr
        elif self.rank_metric == "k-tau":
            metric = kendalltau
        else:
            raise ValueError()

        if "none_prob" in kargs:
            none_conf = kargs["none_prob"]
            scalars[prefix + "none-text-f1-" + self.rank_metric] = metric(none_conf, scores[:, 3])[0]
            scalars[prefix + "none-span-accuracy-" + self.rank_metric] = metric(none_conf, scores[:, 0])[0]

        conf = kargs["conf"]
        scalars[prefix + "score-text-f1-" + self.rank_metric] = metric(conf, scores[:, 3])[0]
        scalars[prefix + "score-span-accuracy-" + self.rank_metric] = metric(conf, scores[:, 0])[0]
        return Evaluation(scalars) 
Example 68
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_nanops.py    Apache License 2.0 5 votes vote down vote up
def test_nancorr_kendall(self):
        tm.skip_if_no_package('scipy.stats')
        from scipy.stats import kendalltau
        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='kendall') 
Example 69
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_analytics.py    Apache License 2.0 5 votes vote down vote up
def test_corr_rank(self):
        tm._skip_if_no_scipy()

        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        tm.assert_almost_equal(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        tm.assert_almost_equal(result, expected)

        # these methods got rewritten in 0.8
        if scipy.__version__ < LooseVersion('0.9'):
            pytest.skip("skipping corr rank because of scipy version "
                        "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        tm.assert_almost_equal(A.corr(B, method='kendall'), kexp)
        tm.assert_almost_equal(A.corr(B, method='spearman'), sexp) 
Example 70
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_mstats_basic.py    Apache License 2.0 5 votes vote down vote up
def test_kendalltau(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.kendalltau(x, y)
            rm = stats.mstats.kendalltau(xm, ym)
            assert_almost_equal(r[0], rm[0], decimal=10)
            assert_almost_equal(r[1], rm[1], decimal=7) 
Example 71
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 5 votes vote down vote up
def test_kendalltau_vs_mstats_basic():
    np.random.seed(42)
    for s in range(2,10):
        a = []
        # Generate rankings with ties
        for i in range(s):
            a += [i]*i
        b = list(a)
        np.random.shuffle(a)
        np.random.shuffle(b)
        expected = mstats_basic.kendalltau(a, b)
        actual = stats.kendalltau(a, b)
        assert_approx_equal(actual[0], expected[0])
        assert_approx_equal(actual[1], expected[1]) 
Example 72
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_stats.py    Apache License 2.0 5 votes vote down vote up
def test_kendalltau_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [1., 2., 3., 4.]
    y = [np.nan, 2.4, 3.4, 3.4]

    r1 = stats.kendalltau(x, y, nan_policy='omit')
    r2 = stats.kendalltau(x[1:], y[1:])
    assert_allclose(r1.correlation, r2.correlation, atol=1e-15) 
Example 73
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 4 votes vote down vote up
def test_kendalltau():
    # with some ties
    # Cross-check with R:
    # cor.test(c(12,2,1,12,2),c(1,4,7,1,0),method="kendall",exact=FALSE)
    x1 = [12, 2, 1, 12, 2]
    x2 = [1, 4, 7, 1, 0]
    expected = (-0.47140452079103173, 0.28274545993277478)
    res = stats.kendalltau(x1, x2)
    assert_approx_equal(res[0], expected[0])
    assert_approx_equal(res[1], expected[1])

    # test for namedtuple attribute results
    attributes = ('correlation', 'pvalue')
    res = stats.kendalltau(x1, x2)
    check_named_results(res, attributes)

    # with only ties in one or both inputs
    assert_equal(stats.kendalltau([2,2,2], [2,2,2]), (np.nan, np.nan))
    assert_equal(stats.kendalltau([2,0,2], [2,2,2]), (np.nan, np.nan))
    assert_equal(stats.kendalltau([2,2,2], [2,0,2]), (np.nan, np.nan))

    # empty arrays provided as input
    assert_equal(stats.kendalltau([], []), (np.nan, np.nan))

    # check with larger arrays
    np.random.seed(7546)
    x = np.array([np.random.normal(loc=1, scale=1, size=500),
                np.random.normal(loc=1, scale=1, size=500)])
    corr = [[1.0, 0.3],
            [0.3, 1.0]]
    x = np.dot(np.linalg.cholesky(corr), x)
    expected = (0.19291382765531062, 1.1337095377742629e-10)
    res = stats.kendalltau(x[0], x[1])
    assert_approx_equal(res[0], expected[0])
    assert_approx_equal(res[1], expected[1])

    # and do we get a tau of 1 for identical inputs?
    assert_approx_equal(stats.kendalltau([1,1,2], [1,1,2])[0], 1.0)

    # test nan_policy
    x = np.arange(10.)
    x[9] = np.nan
    assert_array_equal(stats.kendalltau(x, x), (np.nan, np.nan))
    assert_allclose(stats.kendalltau(x, x, nan_policy='omit'),
                    (1.0, 0.00017455009626808976), rtol=1e-06)
    assert_raises(ValueError, stats.kendalltau, x, x, nan_policy='raise')
    assert_raises(ValueError, stats.kendalltau, x, x, nan_policy='foobar')

    # test unequal length inputs
    x = np.arange(10.)
    y = np.arange(20.)
    assert_raises(ValueError, stats.kendalltau, x, y)

    # test all ties
    tau, p_value = stats.kendalltau([], [])
    assert_equal(np.nan, tau)
    assert_equal(np.nan, p_value)
    tau, p_value = stats.kendalltau([0], [0])
    assert_equal(np.nan, tau)
    assert_equal(np.nan, p_value) 
Example 74
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_analytics.py    MIT License 4 votes vote down vote up
def test_corr_rank(self):
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method="kendall")
        expected = stats.kendalltau(A, B)[0]
        tm.assert_almost_equal(result, expected)

        result = A.corr(B, method="spearman")
        expected = stats.spearmanr(A, B)[0]
        tm.assert_almost_equal(result, expected)

        # results from R
        A = Series(
            [
                -0.89926396,
                0.94209606,
                -1.03289164,
                -0.95445587,
                0.76910310,
                -0.06430576,
                -2.09704447,
                0.40660407,
                -0.89926396,
                0.94209606,
            ]
        )
        B = Series(
            [
                -1.01270225,
                -0.62210117,
                -1.56895827,
                0.59592943,
                -0.01680292,
                1.17258718,
                -1.06009347,
                -0.10222060,
                -0.89076239,
                0.89372375,
            ]
        )
        kexp = 0.4319297
        sexp = 0.5853767
        tm.assert_almost_equal(A.corr(B, method="kendall"), kexp)
        tm.assert_almost_equal(A.corr(B, method="spearman"), sexp) 
Example 75
Project: news-popularity-prediction   Author: MKLab-ITI   File: ranking.py    Apache License 2.0 4 votes vote down vote up
def learning_module(file_path,
                    X_train,
                    X_test,
                    y_train,
                    y_test,
                    train_test,
                    baseline=None):
    if baseline is None:
        regressor_fitted = get_regressor_fitted(file_path,
                                                X_train,
                                                X_test,
                                                y_train,
                                                y_test)

        y_pred = regressor_fitted.predict(X_test)

        test = train_test[1]

        max_pred_index = np.argmax(y_pred)
        max_pred_index = test[max_pred_index]

        feature_importances = regressor_fitted.feature_importances_
    else:
        if baseline == "mean":
            y_pred = np.ones_like(y_test) * baseline_mean(y_train)
        elif baseline == "median":
            y_pred = np.ones_like(y_test) * baseline_median(y_train)
        else:
            print("Invalid baseline method.")
            raise RuntimeError

        feature_importances = np.empty((1, 0))

    # Test.
    kendall_tau_score, p_value = kendalltau(y_test, y_pred)

    mse = np.mean(np.power(y_test - y_pred, 2))
    top_k_jaccard = top_k_jaccard_score(y_test, y_pred, top_k=100)

    ranking_evaluation_tuple = [kendall_tau_score, p_value,
                                mse, top_k_jaccard,
                                feature_importances]

    return ranking_evaluation_tuple 
Example 76
Project: ijcai2019-relis   Author: UKPLab   File: evaluator.py    MIT License 4 votes vote down vote up
def evaluateReward(learnt_values, ref_values,short=False):
    metrics_dic = OrderedDict()

    metrics_dic['mse'] = mean_squared_error(ref_values,learnt_values)

    if not short:
        ### compute the absolute errors
        mse = mean_squared_error(ref_values,learnt_values)
        metrics_dic['mse'] = mse

        ### compute KL divergence
        #js = jsd(learnt_values,ref_values)
        #metrics_dic['jsd-original'] = js
        prob_optimal = getSoftmaxList(ref_values, 1.0)
        prob_learnt = getSoftmaxList(learnt_values, 1.0)
        js = jsd(prob_optimal,prob_learnt)
        metrics_dic['jsd-softmax'] = js
        #kld = stats.entropy(prob_optimal, prob_learnt)
        #metrics_dic['kld'] = kld

    ### compute Kendall's tau, Spearman's rho and Pearson correlation coefficient
    sorted_list = sorted(learnt_values)
    new_reward_ranking = [sorted_list.index(i) for i in learnt_values]
    sorted_list = sorted(ref_values)
    true_reward_ranking = [sorted_list.index(i) for i in ref_values]
    tau, _ = stats.kendalltau(new_reward_ranking, true_reward_ranking)
    rho, _ = stats.pearsonr(new_reward_ranking, true_reward_ranking)
    pcc, _ = stats.pearsonr(learnt_values, ref_values)
    metrics_dic['tau'] = tau
    metrics_dic['rho'] = rho
    metrics_dic['pcc'] = pcc

    ### compute nDCG
    sorted_list = sorted(learnt_values,reverse=True)
    ll = [ref_values[learnt_values.index(ele)] for ele in sorted_list]

    ndcg = ndcg_at_k(ll,int(0.01*len(ll)))
    metrics_dic['ndcg_at_1%'] = ndcg
    ndcg = ndcg_at_k(ll,int(0.05*len(ll)))
    metrics_dic['ndcg_at_5%'] = ndcg
    ndcg = ndcg_at_k(ll,int(0.1*len(ll)))
    metrics_dic['ndcg_at_10%'] = ndcg
    ndcg = ndcg_at_k(ll,int(0.2*len(ll)))
    metrics_dic['ndcg_at_20%'] = ndcg
    ndcg = ndcg_at_k(ll,int(0.5*len(ll)))
    metrics_dic['ndcg_at_50%'] = ndcg
    ndcg = ndcg_at_k(ll,len(ll))
    metrics_dic['ndcg_at_all'] = ndcg

    return metrics_dic 
Example 77
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 4 votes vote down vote up
def test_kendalltau():
    # with some ties
    x1 = [12, 2, 1, 12, 2]
    x2 = [1, 4, 7, 1, 0]
    expected = (-0.47140452079103173, 0.24821309157521476)
    res = stats.kendalltau(x1, x2)
    assert_approx_equal(res[0], expected[0])
    assert_approx_equal(res[1], expected[1])

    # test for namedtuple attribute results
    attributes = ('correlation', 'pvalue')
    res = stats.kendalltau(x1, x2)
    check_named_results(res, attributes)

    # with only ties in one or both inputs
    assert_equal(stats.kendalltau([2,2,2], [2,2,2]), (np.nan, np.nan))
    assert_equal(stats.kendalltau([2,0,2], [2,2,2]), (np.nan, np.nan))
    assert_equal(stats.kendalltau([2,2,2], [2,0,2]), (np.nan, np.nan))

    # empty arrays provided as input
    assert_equal(stats.kendalltau([], []), (np.nan, np.nan))

    # check two different sort methods
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        assert_approx_equal(stats.kendalltau(x1, x2, initial_lexsort=False)[1],
                            stats.kendalltau(x1, x2, initial_lexsort=True)[1])

    # and with larger arrays
    np.random.seed(7546)
    x = np.array([np.random.normal(loc=1, scale=1, size=500),
                np.random.normal(loc=1, scale=1, size=500)])
    corr = [[1.0, 0.3],
            [0.3, 1.0]]
    x = np.dot(np.linalg.cholesky(corr), x)
    expected = (0.19291382765531062, 1.1337108207276285e-10)
    res = stats.kendalltau(x[0], x[1])
    assert_approx_equal(res[0], expected[0])
    assert_approx_equal(res[1], expected[1])

    # and do we get a tau of 1 for identical inputs?
    assert_approx_equal(stats.kendalltau([1,1,2], [1,1,2])[0], 1.0)

    # test nan_policy
    x = np.arange(10.)
    x[9] = np.nan
    assert_array_equal(stats.kendalltau(x, x), (np.nan, np.nan))
    assert_allclose(stats.kendalltau(x, x, nan_policy='omit'),
                    (1.0, 0.00017455009626808976), rtol=1e-06)
    assert_raises(ValueError, stats.kendalltau, x, x, nan_policy='raise')
    assert_raises(ValueError, stats.kendalltau, x, x, nan_policy='foobar')

    # test unequal length inputs
    x = np.arange(10.)
    y = np.arange(20.)
    assert_raises(ValueError, stats.kendalltau, x, y) 
Example 78
Project: SumQE   Author: nlpaueb   File: rouge_bleu_experiments.py    MIT License 4 votes vote down vote up
def mean_aggregated_correlations(data, year):
    """
    Computes the correlation between auto_metrics and human_metrics
    :param data: The actual data of the year stored on dictionary
    :param year: The corresponding year of the data. It is used when we save the predictions
    :return:
    """
    system_ids = {peer_id for doc in data.values() for peer_id, peer in doc['peer_summarizers'].items()}

    auto_aggregation_table = np.zeros([len(system_ids), len(auto_metrics_names)])
    human_aggregation_table = np.zeros([len(system_ids), len(human_metrics_names)])

    for i, sid in enumerate(system_ids):
        auto_score_list = []  # A list of lists for each auto metric (e.g ROUGE, BLEU)
        human_score_list = []  # A list of lists for each human metric

        for doc in data.values():

            auto_score_list.append(short_automatic_metrics(doc['peer_summarizers'][sid]['rouge_scores']))
            human_score_list.append(short_human_metrics(doc['peer_summarizers'][sid]['human_scores']))

        auto_score_array = np.array(auto_score_list)
        auto_aggregation_table[i, :] = np.mean(auto_score_array, axis=0)

        human_score_array = np.array(human_score_list)
        human_aggregation_table[i, :] = np.mean(human_score_array, axis=0)

    for i, metric in enumerate(human_metrics_names):

        path_to_save = os.path.join(OUTPUT_DIR, 'ROUGE_BLEU-{0:s} {1:s}.csv'.format(metric, year))

        with open(path_to_save, 'w') as file:
            the_writer = csv.writer(file, delimiter=',')
            the_writer.writerow([' ', 'Spearman', 'Kendall', 'Pearson'])

            for j, name in enumerate(auto_metrics_names):
                the_writer.writerow([
                    str(name),
                    np.round(spearmanr(human_aggregation_table[:, i], auto_aggregation_table[:, j])[0], 3),
                    np.round(kendalltau(human_aggregation_table[:, i], auto_aggregation_table[:, j])[0], 3),
                    np.round(pearsonr(human_aggregation_table[:, i], auto_aggregation_table[:, j])[0], 3)
                ]) 
Example 79
Project: SumQE   Author: nlpaueb   File: BERT_GPT2.py    MIT License 4 votes vote down vote up
def compute_correlations_of_each_k(data, predictions, model_name, year):
    """
    Computes the correlations between the BERT or GPT2 Language Model with Q1
    :param data: The actual data of the year stored on dictionary
    :param predictions: A dict where the predictions from our experiments are saved
    :param model_name: Name of LM_experiments we used (BERT or GPT2). It is used on the output file name
    :param year: The corresponding year of the data
    :return: The best k (combination of k-worst bpes) which achieved the best spearman correlations
    """
    system_ids = {peer_id for doc in data.values() for peer_id, peer in doc['peer_summarizers'].items()}

    q1_aggregation_table = np.zeros(len(system_ids))
    model_predictions_aggregation_table = []

    # len(predictions) = MAX_BPES_TO_SEARCH
    for i in range(len(predictions)):
        model_predictions_aggregation_table.append(np.zeros(len(system_ids)))

    for i, sid in enumerate(system_ids):
        q1_scores = []
        model_scores = [[] for _ in range(len(predictions))]

        for doc_id, doc in data.items():
            q1_scores.append(doc['peer_summarizers'][sid]['human_scores']['Q1'])
            q1_aggregation_table[i] = np.mean(np.array(q1_scores))

            for j in range(len(predictions)):
                model_scores[j].append(predictions[j][doc_id][sid])

            for k in range(len(predictions)):
                model_predictions_aggregation_table[k][i] = np.mean(np.array(model_scores[k]))

    spearman, kendall, pearson, lines_2_write = [], [], [], []
    for k in range(len(predictions)):
        spearman_corr = spearmanr(q1_aggregation_table, -model_predictions_aggregation_table[k])[0]
        spearman.append(spearman_corr)
        kendall_corr = kendalltau(q1_aggregation_table, -model_predictions_aggregation_table[k])[0]
        kendall.append(kendall_corr)
        pearson_corr = pearsonr(q1_aggregation_table, -model_predictions_aggregation_table[k])[0]
        pearson.append(pearson_corr)

        lines_2_write.append([str(k+1) + ' bpes', spearman_corr, kendall_corr, pearson_corr])

    path_to_save = os.path.join(OUTPUT_DIR, 'Q1 - {0:s}  {1:s}.csv'.format(model_name, year))

    with open(path_to_save, 'w') as file:
        the_writer = csv.writer(file, delimiter=',')
        the_writer.writerow(['# Bpes', 'Spearman', 'Kendall', 'Pearson'])

        for line in lines_2_write:
            the_writer.writerow(line)

    # Visualize the correlations of each k-worst bpes perplexity and actual-scores
    visualize_correlation_metrics(spearman, kendall, pearson, model_name, year)

    # Compute and return the best k by spearman
    spearman_max = max(spearman)
    best_k = spearman.index(spearman_max) + 1

    return best_k 
Example 80
Project: choix   Author: lucasmaystre   File: utils.py    MIT License 4 votes vote down vote up
def kendalltau_dist(params1, params2=None):
    r"""Compute the Kendall tau distance between two models.

    This function computes the Kendall tau distance between the rankings
    induced by two parameter vectors. Let :math:`\sigma_i` be the rank of item
    ``i`` in the model described by ``params1``, and :math:`\tau_i` be its rank
    in the model described by ``params2``. The Kendall tau distance is defined
    as the number of pairwise disagreements between the two rankings, i.e.,

    .. math::

      \sum_{i=1}^N \sum_{j=1}^N
        \mathbf{1} \{ \sigma_i > \sigma_j \wedge \tau_i < \tau_j \}

    By convention, items with the lowest parameters are ranked first (i.e.,
    sorted using the natural order).

    If the argument ``params2`` is ``None``, the second model is assumed to
    rank the items by their index: item ``0`` has rank 1, item ``1`` has rank
    2, etc.

    If some values are equal within a parameter vector, all items are given a
    distinct rank, corresponding to the order in which the values occur.

    Parameters
    ----------
    params1 : array_like
        Parameters of the first model.
    params2 : array_like, optional
        Parameters of the second model.

    Returns
    -------
    dist : float
        Kendall tau distance.
    """
    assert params2 is None or len(params1) == len(params2)
    ranks1 = rankdata(params1, method="ordinal")
    if params2 is None:
        ranks2 = np.arange(1, len(params1) + 1, dtype=float)
    else:
        ranks2 = rankdata(params2, method="ordinal")
    tau, _ = kendalltau(ranks1, ranks2)
    n_items = len(params1)
    n_pairs = n_items * (n_items - 1) / 2
    return round((n_pairs - n_pairs * tau) / 2)