Python scipy.stats.kendalltau() Examples

The following are 30 code examples of scipy.stats.kendalltau(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function

Example #1

Source File: nanops.py From recruit with Apache License 2.0

7 votes

def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr
    elif callable(method):
        return method

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]

Example #2

Source File: util.py From eval-nas with MIT License

6 votes

def compute_sparse_kendalltau(model_ids, model_perfs, gt_perfs, threshold=1e-4,
                              fn_map_perf_to_new_rank=sparse_rank_by_simple_bin):
    """
    Compute the sparse kendall tau, by compression
    :param model_ids:
    :param model_perfs:
    :param gt_perfs:
    :param threshold:
    :param fn_map_perf_to_new_rank:
    :return:
    """
    avg_perfs_multiplier = 1e-2 if 1 < np.average(model_perfs) < 100 else 1.
    model_perfs = [p * avg_perfs_multiplier for p in model_perfs]
    gt_perfs, _ = sort_hash_perfs(gt_perfs, model_perfs)
    sgt_model_ids, sgt_perfs = sort_hash_perfs(model_ids, gt_perfs, verbose=False)
    sgt_sparse_ranks = fn_map_perf_to_new_rank(sgt_model_ids, sgt_perfs, threshold=threshold)
    pred_sparse_ranks = [sgt_sparse_ranks[sgt_model_ids.index(i)] for i in model_ids]
    print("Reduced ranks from {} to {}".format(len(set(model_ids)), len(set(sgt_sparse_ranks))))
    return kendalltau(sgt_sparse_ranks, pred_sparse_ranks)

Example #3

Source File: sorting_task.py From sinkhorn-policy-gradient.pytorch with BSD 3-Clause "New" or "Revised" License

6 votes

def reward_ddpg_D(solution, use_cuda):
    """
    Kendall-Tau correlation coefficient
    """
    (batch_size, n, m) = solution.size()
    if use_cuda:
        solution = solution.data.cpu().numpy()
    else:
        solution = solution.data.numpy()

    target = np.array(list(range(m)))
    R = []
    for i in range(batch_size):
        R.append(torch.FloatTensor([stats.kendalltau(solution[i], target).correlation]))
    R = torch.stack(R)
    if use_cuda:
        R = R.cuda()
    return Variable(R, requires_grad=False)

Example #4

Source File: nanops.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr
    elif callable(method):
        return method

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]

Example #5

Source File: copula.py From pycopula with Apache License 2.0

6 votes

def correlations(self, X):
		"""
		Compute the correlations of the specified data. Only available when dimension of copula is 2.

		Parameters
		----------
		X : numpy array (of size n * 2)
			Values to compute correlations.

		Returns
		-------
		kendall : float
			The Kendall tau.
		pearson : float
			The Pearson's R
		spearman : float
			The Spearman's R
		"""
		if self.dim != 2:
			raise Exception("Correlations can not be computed when dimension is greater than 2.")
		self.kendall = kendalltau(X[:,0], X[:,1])[0]
		self.pearson = pearsonr(X[:,0], X[:,1])[0]
		self.spearman = spearmanr(X[:,0], X[:,1])[0]
		return self.kendall, self.pearson, self.spearman

Example #6

Source File: nanops.py From elasticintel with GNU General Public License v3.0

6 votes

def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]

Example #7

Source File: nanops.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]

Example #8

Source File: copulafit.py From copula-py with GNU General Public License v3.0

6 votes

def _gaussian_PKTE(X):
    # the algorithm for this comes from the paper:
    # "Gaussian Copula Precision Estimation with Missing Values" 
    # by Huahua Wang, Faridel Fazayeli, Soumyadeep Chatterjee, Arindam Banerjee
    N = X.shape[1]
    sigma_hat = np.ones((N,N))
    for dim1 in range(0,N-1):
        for dim2 in range(dim1+1,N):
            rho = np.sin(math.pi/2 * kendalltau(X[:,dim1],X[:,dim2]))
            # correlation matrix is symmetric
            sigma_hat[dim1][dim2] = rho
            sigma_hat[dim2][dim1] = rho
            
    # ensure that sigma_hat is positive semidefinite
    sigma_hat = _nearPD(sigma_hat)
            
    return sigma_hat

# TODO: T copula stuff

Example #9

Source File: summary.py From neleval with Apache License 2.0

6 votes

def __call__(self):
        all_results = np.empty((len(self.systems), len(self.measures)))
        # TODO: parallelise?
        for system, sys_results in zip(self.systems, all_results):
            if self.gold is None:
                result_dict = Evaluate.read_tab_format(utf8_open(system))
            else:
                result_dict = Evaluate(system, self.gold, measures=self.measures, fmt='none')()
            sys_results[...] = [result_dict[measure]['fscore'] for measure in self.measures]

        self.all_results = all_results

        correlations = {}
        scores_by_measure = zip(self.measures, all_results.T)
        for (measure_i, scores_i), (measure_j, scores_j) in _pairs(scores_by_measure):
            correlations[measure_i, measure_j] = {'pearson': stats.pearsonr(scores_i, scores_j),
                                                  'spearman': stats.spearmanr(scores_i, scores_j),
                                                  'kendall': stats.kendalltau(scores_i, scores_j)}

        quartiles = {}
        for measure_i, scores_i in scores_by_measure:
            quartiles[measure_i] = np.percentile(scores_i, [0, 25, 50, 75, 100])

        return self.format(self, {'quartiles': quartiles, 'correlations': correlations})

Example #10

Source File: multivariate_stats.py From copula-py with GNU General Public License v3.0

6 votes

def kendalls_tau(X):
    """
    Calculates a generalized Kendall's tau for a data set given by X, as 
    described by "Multivariate Extensions of Spearman's Rho and Related Statistics"
    
    Inputs:
      X - the input data, should be a numpy array of shape = M x N, where
          M is the number of samples, and N is the dimensionality of the data
    """
    M = X.shape[0]
    N = X.shape[1]
    if N<2:
        raise ValueError('To calculate Kendall\'s Tau, need data of dimensionality >= 2')
    
    ktau = 0.0
    for dim1 in range(0,N-1):
        for dim2 in range(dim1+1,N):
            (t,p) = kendalltau(X[:,dim1],X[:,dim2])
            ktau = ktau + t
    # normalize
    ktau = ktau / comb(N,2)
    return ktau

Example #11

Source File: nanops.py From Computable with MIT License

6 votes

def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]

Example #12

Source File: nanops.py From vnpy_crypto with MIT License

6 votes

def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]

Example #13

Source File: select_allele_specific_models_command.py From mhcflurry with Apache License 2.0

6 votes

def score_function(self, allele):
        full_ensemble_predictions = self.predictor.predict(
            allele=allele,
            peptides=self.peptides)

        def score(predictor, additional_metadata_out=None):
            predictions = predictor.predict(
                allele=allele,
                peptides=self.peptides,
            )
            tau = kendalltau(predictions, full_ensemble_predictions).correlation
            if additional_metadata_out is not None:
                additional_metadata_out["score_consensus_tau"] = tau
            return tau * self.multiply_score_by_value

        return ScoreFunction(
            score, summary=self.plan_summary(allele))

Example #14

Source File: metrics.py From reco with MIT License

6 votes

def kendalltau(rankA, rankB):

    if len(rankA) != len(rankB):
        raise TypeError("The two rank lists must be of the same length.")

    N = len(rankA)

    if isinstance(rankA[0], tuple):
        rankA = [rankA[i][0] for i in range(N)]

    if isinstance(rankB[0], tuple):
        rankB = [rankB[i][0] for i in range(N)]

    listA = [i for i in range(N)]
    listB = [rankB.index(rankA[i]) for i in range(N)]

    return kendalltau(listA, listB)[0]

Example #15

Source File: asap_evaluator.py From nea with GNU General Public License v3.0

5 votes

def calc_correl(self, dev_pred, test_pred):
		dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
		test_prs, _ = pearsonr(test_pred, self.test_y_org)
		dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
		test_spr, _ = spearmanr(test_pred, self.test_y_org)
		dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
		test_tau, _ = kendalltau(test_pred, self.test_y_org)
		return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau

Example #16

Source File: cnn_general_search_policies.py From eval-nas with MIT License

5 votes

def _compute_kendall_tau(ranking_per_epoch, compute_across_time=False):
        """
        Compute Kendall tau given the ranking per epochs.

        :param ranking_per_epoch:
        :param compute_across_time: True for ranking-per-epoch always fixed, False for dynamic list of models.
        :return: kd_tau dict{epoch_key: KendallTau}
        """
        if compute_across_time:
            # Compute Kendall tau for every epochs and save them into result.
            epoch_keys = [k for k in reversed(ranking_per_epoch.keys())]
            epoch_keys.insert(0, 10000000)
            kd_tau = {}
            for ind, k in enumerate(epoch_keys[:-1]):
                elem = []
                if ind == 0:
                    # Sort the ground-truth ranking
                    p = sorted([elem[1] for elem in ranking_per_epoch[epoch_keys[ind + 1]]], key=itemgetter(3))
                    rank_1 = np.array([elem.geno_id for elem in p], dtype=np.uint)
                else:
                    rank_1 = np.array([elem[1].geno_id for elem in ranking_per_epoch[k]], dtype=np.uint)
                for j in epoch_keys[ind + 1:]:
                    rank_2 = np.array([elem[1].geno_id for elem in ranking_per_epoch[j]], dtype=np.uint)
                    elem.append(kendalltau(rank_1, rank_2))
                kd_tau[k] = elem
            logging.info("Latest Kendall Tau (ground-truth vs {}): {}".format(epoch_keys[1], kd_tau[10000000][0]))
            return kd_tau, kd_tau[10000000][0].correlation
        else:
            # Dynamic ranking per epoch size, thus only compute the KDT against the final ranking.
            epoch_keys = [k for k in reversed(ranking_per_epoch.keys())]
            kd_tau = {}
            # only sort across the ground-truth
            for ind, k in enumerate(epoch_keys):
                p = sorted([elem[1] for elem in ranking_per_epoch[k]], key=itemgetter(3))
                rank_gt = np.array([elem.geno_id for elem in p], dtype=np.uint)
                rank_2 = np.array([elem[1].geno_id for elem in ranking_per_epoch[k]], dtype=np.uint)
                kd_tau[k] = kendalltau(rank_gt, rank_2)

            kd_tau[10000000] = kd_tau[epoch_keys[0]]
            logging.info("Latest Kendall Tau (ground-truth vs {}): {}".format(epoch_keys[0], kd_tau[epoch_keys[0]][0]))
            return kd_tau, kd_tau[epoch_keys[0]][0]

Example #17

Source File: test_analytics.py From elasticintel with GNU General Public License v3.0

5 votes

def test_corr_rank(self):
        tm._skip_if_no_scipy()

        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        tm.assert_almost_equal(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        tm.assert_almost_equal(result, expected)

        # these methods got rewritten in 0.8
        if scipy.__version__ < LooseVersion('0.9'):
            pytest.skip("skipping corr rank because of scipy version "
                        "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        tm.assert_almost_equal(A.corr(B, method='kendall'), kexp)
        tm.assert_almost_equal(A.corr(B, method='spearman'), sexp)

Example #18

Source File: eval_metrics.py From kopt with MIT License

5 votes

def kendall(y_true, y_pred, nb_sample=100000):
    """Kendall's tau coefficient, Kendall rank correlation coefficient
    """
    y_true, y_pred = _mask_nan(y_true, y_pred)
    if len(y_true) > nb_sample:
        idx = np.arange(len(y_true))
        np.random.shuffle(idx)
        idx = idx[:nb_sample]
        y_true = y_true[idx]
        y_pred = y_pred[idx]
    return kendalltau(y_true, y_pred)[0]

Example #19

Source File: numerical.py From CausalDiscoveryToolbox with MIT License

5 votes

def predict(self, a, b):
        """ Compute the test statistic

        Args:
            a (array-like): Variable 1
            b (array-like): Variable 2

        Returns:
            float: test statistic
        """
        a = np.array(a).reshape((-1, 1))
        b = np.array(b).reshape((-1, 1))
        return sp.kendalltau(a, b)[0]

Example #20

Source File: evaluation.py From deepcpg with MIT License

5 votes

def kendall(y, z, nb_sample=100000):
    """Compute Kendall's correlation coefficient."""
    if len(y) > nb_sample:
        idx = np.arange(len(y))
        np.random.shuffle(idx)
        idx = idx[:nb_sample]
        y = y[idx]
        z = z[idx]
    return kendalltau(y, z)[0]

Example #21

Source File: test_nanops.py From elasticintel with GNU General Public License v3.0

5 votes

def test_nancorr_kendall(self):
        tm.skip_if_no_package('scipy.stats')
        from scipy.stats import kendalltau
        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='kendall')

Example #22

Source File: test_metric.py From pt-ranking.github.io with MIT License

5 votes

def test_kendall_tau():
    reference = torch.Tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
    sys_1     = torch.Tensor([2.0, 1.0, 5.0, 3.0, 4.0, 6.0, 7.0, 9.0, 8.0, 10.0])
    sys_2     = torch.Tensor([10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 1.0])

    tau_1 = torch_kendall_tau(sys_1, natural_ascending_as_reference=True)
    print('tau_1', tau_1)

    tau_2 = torch_kendall_tau(sys_2, natural_ascending_as_reference=True)
    print('tau_2', tau_2)

    tau, p = stats.kendalltau(reference.data.data.numpy(), sys_1)
    print('scipy-1', tau, p)

    tau, p = stats.kendalltau(reference.data.numpy(), sys_2)
    print('scipy-2', tau, p)

    print()
    print('-----------------------')


    res_reference, _ = torch.sort(reference, dim=0, descending=True)

    tau_1 = torch_kendall_tau(sys_1, natural_ascending_as_reference=False)
    print('tau_1', tau_1)

    tau_2 = torch_kendall_tau(sys_2, natural_ascending_as_reference=False)
    print('tau_2', tau_2)

    tau, p = stats.kendalltau(res_reference.data.numpy(), sys_1)
    print('scipy-1', tau, p)

    tau, p = stats.kendalltau(res_reference.data.numpy(), sys_2)
    print('scipy-2', tau, p)

Example #23

Source File: run_summarization.py From emnlp19-moverscore with MIT License

5 votes

def micro_averaging(dataset, target, device='cuda:0'):
    references, summaries = [], []
    for topic in dataset:
        k,v = topic
        references.extend([' '.join(ref['text']) for ref in v['references']])
        summaries.extend([' '.join(annot['text']) for annot in v['annotations']])
 
    idf_dict_ref = get_idf_dict(references)
    idf_dict_hyp = get_idf_dict(summaries)

    correlations = []
    for topic in tqdm(dataset): 
        k,v = topic
        references = [' '.join(ref['text']) for ref in v['references']]
        num_refs = len(references)
        target_scores, prediction_scores = [], []      

        for annot in v['annotations']:            
            if len(annot['text']) > 1:
                target_scores.append(float(annot[target]))

                scores = word_mover_score(references, [' '.join(annot['text'])] * num_refs, idf_dict_ref, idf_dict_hyp, stop_words,
                                        n_gram=1, remove_subwords=True, batch_size=48)

                prediction_scores.append(np.mean(scores))

        correlations.append([
                         stats.kendalltau(target_scores, prediction_scores)[0], 
                         stats.pearsonr(target_scores, prediction_scores)[0],
                         stats.spearmanr(target_scores, prediction_scores)[0]])
    return np.array(correlations)

Example #24

Source File: evaluate_methods.py From socialsent with Apache License 2.0

5 votes

def ternary_metrics(polarities, lexicon, eval_words, tau_lexicon=None):
    if not tau_lexicon == None:
        kendall_words = list(set(eval_words).intersection(tau_lexicon))
    y_prob, y_true = [], []
    polarities = {word:polarities[word] for word in eval_words}
    for w in polarities:
        y_prob.append(polarities[w])
        y_true.append(lexicon[w])
    y_prob = np.array(y_prob)
    y_true = np.array(y_true)
    y_prob = 2*(y_prob - np.min(y_prob)) / (np.max(y_prob) - np.min(y_prob)) - 1
    neg_prop = np.sum(np.array(lexicon.values()) == -1) / float(len(lexicon))
    pos_prop = np.sum(np.array(lexicon.values()) == 1) / float(len(lexicon))
    sorted_probs = sorted(y_prob)
    neg_thresh = sorted_probs[int(np.round(neg_prop*len(sorted_probs)))]
    pos_thresh = sorted_probs[-int(np.round(pos_prop*len(sorted_probs)))]
    cmn_labels = [1 if val >= pos_thresh else -1 if val <= neg_thresh else 0 for val in y_prob]
    if not tau_lexicon == None:
        tau = kendalltau(*zip(*[(polarities[word], tau_lexicon[word]) for word in kendall_words]))[0]
    else:
        tau = None
    maj_f1 = f1_score(y_true, np.repeat(sp.stats.mode(y_true)[0][0], len(y_true)), average="macro")
    cmn_f1 = f1_score(y_true, cmn_labels, average="macro")
    label_func = lambda entry : 1 if entry > pos_thresh else -1 if entry < neg_thresh else 0
    conf_mat = confusion_matrix(y_true, [label_func(entry) for entry in y_prob])
    return tau, cmn_f1, maj_f1, conf_mat

Example #25

Source File: mixedvine.py From mixedvines with GNU General Public License v3.0

5 votes

def _heuristic_element_order(samples):
        '''
        Finds an order of elements that heuristically facilitates vine
        modelling.  For this purpose, Kendall's tau is calculated between
        samples of pairs of elements and elements are scored according to the
        sum of absolute Kendall's taus of pairs the elements appear in.

        Parameters
        ----------
        samples : array_like
            n-by-d matrix of samples where n is the number of samples and d is
            the number of marginals.

        Returns
        -------
        order : array_like
            Permutation of all element indices reflecting descending scores.
        '''
        dim = samples.shape[1]
        # Score elements according to total absolute Kendall's tau
        score = np.zeros(dim)
        for i in range(1, dim):
            for j in range(i):
                tau, _ = kendalltau(samples[:, i], samples[:, j])
                score[i] += np.abs(tau)
                score[j] += np.abs(tau)
        # Get order indices for descending score
        order = score.argsort()[::-1]
        return order

Example #26

Source File: evaluator.py From document-qa with Apache License 2.0

5 votes

def evaluate(self, data: List[ContextAndQuestion], true_len, **kwargs):
        best_spans = kwargs["span"]
        span_logits = kwargs["score"]
        if self.eval == "triviaqa":
            scores = trivia_span_scores(data, best_spans)
        elif self.eval == "squad":
            scores = squad_span_scores(data, best_spans)
        else:
            raise RuntimeError()

        has_answer = np.array([len(x.answer.answer_spans) > 0 for x in data])

        selected_paragraphs = {}
        for i, point in enumerate(data):
            if self.per_doc:
                key = (point.question_id, point.doc_id)
            else:
                key = point.question_id
            if key not in selected_paragraphs:
                selected_paragraphs[key] = i
            elif span_logits[i] > span_logits[selected_paragraphs[key]]:
                selected_paragraphs[key] = i
        selected_paragraphs = list(selected_paragraphs.values())

        out = {
            "question-text-em": scores[selected_paragraphs, 2].mean(),
            "question-text-f1": scores[selected_paragraphs, 3].mean(),
        }

        if self.k_tau:
            out["text-em-k-tau"] = kendalltau(span_logits, scores[:, 2])[0]
            out["text-f1-k-tau"] = kendalltau(span_logits, scores[:, 3])[0]

        if self.paragraph_level:
            out["paragraph-text-em"] = scores[has_answer, 2].mean()
            out["paragraph-text-f1"] = scores[has_answer, 3].mean()

        prefix = "b%d/" % self.bound
        return Evaluation({prefix+k: v for k,v in out.items()})

Example #27

Source File: evaluator.py From document-qa with Apache License 2.0

5 votes

def evaluate(self, data: List[ContextAndQuestion], true_len, **kargs):
        if self.text_eval == "triviaqa":
            scores = trivia_span_scores(data, kargs["spans"])
        elif self.text_eval == "squad":
            scores = squad_span_scores(data, kargs["spans"])
        else:
            raise RuntimeError()

        has_answer = [len(x.answer.answer_spans) > 0 for x in data]
        aggregated_scores = scores[has_answer].mean(axis=0)
        prefix ="b%d/" % self.bound
        scalars = {
            prefix + "accuracy": aggregated_scores[0],
            prefix + "f1": aggregated_scores[1],
            prefix + "text-accuracy": aggregated_scores[2],
            prefix + "text-f1": aggregated_scores[3]
        }

        if self.rank_metric == "spr":
            metric = spearmanr
        elif self.rank_metric == "k-tau":
            metric = kendalltau
        else:
            raise ValueError()

        if "none_prob" in kargs:
            none_conf = kargs["none_prob"]
            scalars[prefix + "none-text-f1-" + self.rank_metric] = metric(none_conf, scores[:, 3])[0]
            scalars[prefix + "none-span-accuracy-" + self.rank_metric] = metric(none_conf, scores[:, 0])[0]

        conf = kargs["conf"]
        scalars[prefix + "score-text-f1-" + self.rank_metric] = metric(conf, scores[:, 3])[0]
        scalars[prefix + "score-span-accuracy-" + self.rank_metric] = metric(conf, scores[:, 0])[0]
        return Evaluation(scalars)

Example #28

Source File: test_nanops.py From twitter-stock-recommendation with MIT License

5 votes

def test_nancorr_kendall(self):
        from scipy.stats import kendalltau
        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='kendall')

Example #29

Source File: test_analytics.py From twitter-stock-recommendation with MIT License

5 votes

def test_corr_rank(self):
        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        tm.assert_almost_equal(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        tm.assert_almost_equal(result, expected)

        # these methods got rewritten in 0.8
        if LooseVersion(scipy.__version__) < LooseVersion('0.9'):
            pytest.skip("skipping corr rank because of scipy version "
                        "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        tm.assert_almost_equal(A.corr(B, method='kendall'), kexp)
        tm.assert_almost_equal(A.corr(B, method='spearman'), sexp)

Example #30

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_kendalltau_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [1., 2., 3., 4.]
    y = [np.nan, 2.4, 3.4, 3.4]

    r1 = stats.kendalltau(x, y, nan_policy='omit')
    r2 = stats.kendalltau(x[1:], y[1:])
    assert_allclose(r1.correlation, r2.correlation, atol=1e-15)