Python sklearn.exceptions.UndefinedMetricWarning() Examples

The following are 20 code examples of sklearn.exceptions.UndefinedMetricWarning(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.exceptions , or try the search function .
Example #1
Source File: test_classification.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_precision_recall_f1_no_labels(beta, average):
    y_true = np.zeros((20, 3))
    y_pred = np.zeros_like(y_true)

    p, r, f, s = assert_warns(UndefinedMetricWarning,
                              precision_recall_fscore_support,
                              y_true, y_pred, average=average,
                              beta=beta)
    assert_almost_equal(p, 0)
    assert_almost_equal(r, 0)
    assert_almost_equal(f, 0)
    assert_equal(s, None)

    fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
                         y_true, y_pred,
                         beta=beta, average=average)
    assert_almost_equal(fbeta, 0) 
Example #2
Source File: test_ranking.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_roc_curve_one_label():
    y_true = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    y_pred = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # assert there are warnings
    w = UndefinedMetricWarning
    fpr, tpr, thresholds = assert_warns(w, roc_curve, y_true, y_pred)
    # all true labels, all fpr should be nan
    assert_array_equal(fpr,
                       np.nan * np.ones(len(thresholds)))
    assert_equal(fpr.shape, tpr.shape)
    assert_equal(fpr.shape, thresholds.shape)

    # assert there are warnings
    fpr, tpr, thresholds = assert_warns(w, roc_curve,
                                        [1 - x for x in y_true],
                                        y_pred)
    # all negative labels, all tpr should be nan
    assert_array_equal(tpr,
                       np.nan * np.ones(len(thresholds)))
    assert_equal(fpr.shape, tpr.shape)
    assert_equal(fpr.shape, thresholds.shape) 
Example #3
Source File: metrics.py    From AIF360 with Apache License 2.0 6 votes vote down vote up
def specificity_score(y_true, y_pred, pos_label=1, sample_weight=None):
    """Compute the specificity or true negative rate.

    Args:
        y_true (array-like): Ground truth (correct) target values.
        y_pred (array-like): Estimated targets as returned by a classifier.
        pos_label (scalar, optional): The label of the positive class.
        sample_weight (array-like, optional): Sample weights.
    """
    MCM = multilabel_confusion_matrix(y_true, y_pred, labels=[pos_label],
                                      sample_weight=sample_weight)
    tn, fp, fn, tp = MCM.ravel()
    negs = tn + fp
    if negs == 0:
        warnings.warn('specificity_score is ill-defined and being set to 0.0 '
                      'due to no negative samples.', UndefinedMetricWarning)
        return 0.
    return tn / negs 
Example #4
Source File: test_ranking.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_roc_curve_one_label():
    y_true = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    y_pred = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # assert there are warnings
    w = UndefinedMetricWarning
    fpr, tpr, thresholds = assert_warns(w, roc_curve, y_true, y_pred)
    # all true labels, all fpr should be nan
    assert_array_equal(fpr, np.full(len(thresholds), np.nan))
    assert_equal(fpr.shape, tpr.shape)
    assert_equal(fpr.shape, thresholds.shape)

    # assert there are warnings
    fpr, tpr, thresholds = assert_warns(w, roc_curve,
                                        [1 - x for x in y_true],
                                        y_pred)
    # all negative labels, all tpr should be nan
    assert_array_equal(tpr, np.full(len(thresholds), np.nan))
    assert_equal(fpr.shape, tpr.shape)
    assert_equal(fpr.shape, thresholds.shape) 
Example #5
Source File: test_classification.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_precision_recall_f1_no_labels_average_none():
    y_true = np.zeros((20, 3))
    y_pred = np.zeros_like(y_true)

    beta = 1

    # tp = [0, 0, 0]
    # fn = [0, 0, 0]
    # fp = [0, 0, 0]
    # support = [0, 0, 0]
    # |y_hat_i inter y_i | = [0, 0, 0]
    # |y_i| = [0, 0, 0]
    # |y_hat_i| = [0, 0, 0]

    p, r, f, s = assert_warns(UndefinedMetricWarning,
                              precision_recall_fscore_support,
                              y_true, y_pred, average=None, beta=beta)
    assert_array_almost_equal(p, [0, 0, 0], 2)
    assert_array_almost_equal(r, [0, 0, 0], 2)
    assert_array_almost_equal(f, [0, 0, 0], 2)
    assert_array_almost_equal(s, [0, 0, 0], 2)

    fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
                         y_true, y_pred, beta=beta, average=None)
    assert_array_almost_equal(fbeta, [0, 0, 0], 2) 
Example #6
Source File: test_classification.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_average_binary_jaccard_score(recwarn):
    # tp=0, fp=0, fn=1, tn=0
    assert jaccard_score([1], [0], average='binary') == 0.
    # tp=0, fp=0, fn=0, tn=1
    msg = ('Jaccard is ill-defined and being set to 0.0 due to '
           'no true or predicted samples')
    assert assert_warns_message(UndefinedMetricWarning,
                                msg,
                                jaccard_score,
                                [0, 0], [0, 0],
                                average='binary') == 0.
    # tp=1, fp=0, fn=0, tn=0 (pos_label=0)
    assert jaccard_score([0], [0], pos_label=0,
                         average='binary') == 1.
    y_true = np.array([1, 0, 1, 1, 0])
    y_pred = np.array([1, 0, 1, 1, 1])
    assert_almost_equal(jaccard_score(y_true, y_pred,
                                      average='binary'), 3. / 4)
    assert_almost_equal(jaccard_score(y_true, y_pred,
                                      average='binary',
                                      pos_label=0), 1. / 2)

    assert not list(recwarn) 
Example #7
Source File: metrics.py    From AIF360 with Apache License 2.0 5 votes vote down vote up
def generalized_fpr(y_true, probas_pred, pos_label=1, sample_weight=None):
    r"""Return the ratio of generalized false positives to negative examples in
    the dataset, :math:`GFPR = \tfrac{GFP}{N}`.

    Generalized confusion matrix measures such as this are calculated by summing
    the probabilities of the positive class instead of the hard predictions.

    Args:
        y_true (array-like): Ground-truth (correct) target values.
        probas_pred (array-like): Probability estimates of the positive class.
        pos_label (scalar, optional): The label of the positive class.
        sample_weight (array-like, optional): Sample weights.

    Returns:
        float: Generalized false positive rate. If there are no negative samples
        in y_true, this will raise an
        :class:`~sklearn.exceptions.UndefinedMetricWarning` and return 0.
    """
    idx = (y_true != pos_label)
    if not np.any(idx):
        warnings.warn("generalized_fpr is ill-defined because there are no "
                      "negative samples in y_true.", UndefinedMetricWarning)
        return 0.
    if sample_weight is None:
        return probas_pred[idx].mean()
    return np.average(probas_pred[idx], weights=sample_weight[idx]) 
Example #8
Source File: metrics.py    From AIF360 with Apache License 2.0 5 votes vote down vote up
def generalized_fnr(y_true, probas_pred, pos_label=1, sample_weight=None):
    r"""Return the ratio of generalized false negatives to positive examples in
    the dataset, :math:`GFNR = \tfrac{GFN}{P}`.

    Generalized confusion matrix measures such as this are calculated by summing
    the probabilities of the positive class instead of the hard predictions.

    Args:
        y_true (array-like): Ground-truth (correct) target values.
        probas_pred (array-like): Probability estimates of the positive class.
        pos_label (scalar, optional): The label of the positive class.
        sample_weight (array-like, optional): Sample weights.

    Returns:
        float: Generalized false negative rate. If there are no positive samples
        in y_true, this will raise an
        :class:`~sklearn.exceptions.UndefinedMetricWarning` and return 0.
    """
    idx = (y_true == pos_label)
    if not np.any(idx):
        warnings.warn("generalized_fnr is ill-defined because there are no "
                      "positive samples in y_true.", UndefinedMetricWarning)
        return 0.
    if sample_weight is None:
        return 1 - probas_pred[idx].mean()
    return 1 - np.average(probas_pred[idx], weights=sample_weight[idx])


# ============================ GROUP FAIRNESS ================================== 
Example #9
Source File: test_classification.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_precision_recall_f1_no_labels():
    y_true = np.zeros((20, 3))
    y_pred = np.zeros_like(y_true)

    # tp = [0, 0, 0]
    # fn = [0, 0, 0]
    # fp = [0, 0, 0]
    # support = [0, 0, 0]
    # |y_hat_i inter y_i | = [0, 0, 0]
    # |y_i| = [0, 0, 0]
    # |y_hat_i| = [0, 0, 0]

    for beta in [1]:
        p, r, f, s = assert_warns(UndefinedMetricWarning,
                                  precision_recall_fscore_support,
                                  y_true, y_pred, average=None, beta=beta)
        assert_array_almost_equal(p, [0, 0, 0], 2)
        assert_array_almost_equal(r, [0, 0, 0], 2)
        assert_array_almost_equal(f, [0, 0, 0], 2)
        assert_array_almost_equal(s, [0, 0, 0], 2)

        fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
                             y_true, y_pred, beta=beta, average=None)
        assert_array_almost_equal(fbeta, [0, 0, 0], 2)

        for average in ["macro", "micro", "weighted", "samples"]:
            p, r, f, s = assert_warns(UndefinedMetricWarning,
                                      precision_recall_fscore_support,
                                      y_true, y_pred, average=average,
                                      beta=beta)
            assert_almost_equal(p, 0)
            assert_almost_equal(r, 0)
            assert_almost_equal(f, 0)
            assert_equal(s, None)

            fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
                                 y_true, y_pred,
                                 beta=beta, average=average)
            assert_almost_equal(fbeta, 0) 
Example #10
Source File: test_classification.py    From Mastering-Elasticsearch-7.0 with MIT License 4 votes vote down vote up
def test_prf_warnings():
    # average of per-label scores
    f, w = precision_recall_fscore_support, UndefinedMetricWarning
    my_assert = assert_warns_message
    for average in [None, 'weighted', 'macro']:
        msg = ('Precision and F-score are ill-defined and '
               'being set to 0.0 in labels with no predicted samples.')
        my_assert(w, msg, f, [0, 1, 2], [1, 1, 2], average=average)

        msg = ('Recall and F-score are ill-defined and '
               'being set to 0.0 in labels with no true samples.')
        my_assert(w, msg, f, [1, 1, 2], [0, 1, 2], average=average)

    # average of per-sample scores
    msg = ('Precision and F-score are ill-defined and '
           'being set to 0.0 in samples with no predicted labels.')
    my_assert(w, msg, f, np.array([[1, 0], [1, 0]]),
              np.array([[1, 0], [0, 0]]), average='samples')

    msg = ('Recall and F-score are ill-defined and '
           'being set to 0.0 in samples with no true labels.')
    my_assert(w, msg, f, np.array([[1, 0], [0, 0]]),
              np.array([[1, 0], [1, 0]]),
              average='samples')

    # single score: micro-average
    msg = ('Precision and F-score are ill-defined and '
           'being set to 0.0 due to no predicted samples.')
    my_assert(w, msg, f, np.array([[1, 1], [1, 1]]),
              np.array([[0, 0], [0, 0]]), average='micro')

    msg = ('Recall and F-score are ill-defined and '
           'being set to 0.0 due to no true samples.')
    my_assert(w, msg, f, np.array([[0, 0], [0, 0]]),
              np.array([[1, 1], [1, 1]]), average='micro')

    # single positive label
    msg = ('Precision and F-score are ill-defined and '
           'being set to 0.0 due to no predicted samples.')
    my_assert(w, msg, f, [1, 1], [-1, -1], average='binary')

    msg = ('Recall and F-score are ill-defined and '
           'being set to 0.0 due to no true samples.')
    my_assert(w, msg, f, [-1, -1], [1, 1], average='binary')

    clean_warning_registry()
    with warnings.catch_warnings(record=True) as record:
        warnings.simplefilter('always')
        precision_recall_fscore_support([0, 0], [0, 0], average="binary")
        msg = ('Recall and F-score are ill-defined and '
               'being set to 0.0 due to no true samples.')
        assert_equal(str(record.pop().message), msg)
        msg = ('Precision and F-score are ill-defined and '
               'being set to 0.0 due to no predicted samples.')
        assert_equal(str(record.pop().message), msg) 
Example #11
Source File: test.py    From rasa_core with Apache License 2.0 4 votes vote down vote up
def collect_story_predictions(
    completed_trackers: List['DialogueStateTracker'],
    agent: 'Agent',
    fail_on_prediction_errors: bool = False,
    use_e2e: bool = False
) -> Tuple[StoryEvalution, int]:
    """Test the stories from a file, running them through the stored model."""
    from rasa_nlu.test import get_evaluation_metrics
    from tqdm import tqdm

    story_eval_store = EvaluationStore()
    failed = []
    correct_dialogues = []
    num_stories = len(completed_trackers)

    logger.info("Evaluating {} stories\n"
                "Progress:".format(num_stories))

    action_list = []

    for tracker in tqdm(completed_trackers):
        tracker_results, predicted_tracker, tracker_actions = \
            _predict_tracker_actions(tracker, agent,
                                     fail_on_prediction_errors, use_e2e)

        story_eval_store.merge_store(tracker_results)

        action_list.extend(tracker_actions)

        if tracker_results.has_prediction_target_mismatch():
            # there is at least one wrong prediction
            failed.append(predicted_tracker)
            correct_dialogues.append(0)
        else:
            correct_dialogues.append(1)

    logger.info("Finished collecting predictions.")
    with warnings.catch_warnings():
        from sklearn.exceptions import UndefinedMetricWarning

        warnings.simplefilter("ignore", UndefinedMetricWarning)
        report, precision, f1, accuracy = get_evaluation_metrics(
            [1] * len(completed_trackers), correct_dialogues)

    in_training_data_fraction = _in_training_data_fraction(action_list)

    log_evaluation_table([1] * len(completed_trackers),
                         "END-TO-END" if use_e2e else "CONVERSATION",
                         report, precision, f1, accuracy,
                         in_training_data_fraction,
                         include_report=False)

    return (StoryEvalution(evaluation_store=story_eval_store,
                           failed_stories=failed,
                           action_list=action_list,
                           in_training_data_fraction=in_training_data_fraction),
            num_stories) 
Example #12
Source File: test.py    From rasa_core with Apache License 2.0 4 votes vote down vote up
def test(stories: Text,
               agent: 'Agent',
               max_stories: Optional[int] = None,
               out_directory: Optional[Text] = None,
               fail_on_prediction_errors: bool = False,
               use_e2e: bool = False):
    """Run the evaluation of the stories, optionally plot the results."""
    from rasa_nlu.test import get_evaluation_metrics

    completed_trackers = await _generate_trackers(stories, agent,
                                                  max_stories, use_e2e)

    story_evaluation, _ = collect_story_predictions(completed_trackers, agent,
                                                    fail_on_prediction_errors,
                                                    use_e2e)

    evaluation_store = story_evaluation.evaluation_store

    with warnings.catch_warnings():
        from sklearn.exceptions import UndefinedMetricWarning

        warnings.simplefilter("ignore", UndefinedMetricWarning)
        report, precision, f1, accuracy = get_evaluation_metrics(
            evaluation_store.serialise_targets(),
            evaluation_store.serialise_predictions()
        )

    if out_directory:
        plot_story_evaluation(evaluation_store.action_targets,
                              evaluation_store.action_predictions,
                              report, precision, f1, accuracy,
                              story_evaluation.in_training_data_fraction,
                              out_directory)

    log_failed_stories(story_evaluation.failed_stories, out_directory)

    return {
        "report": report,
        "precision": precision,
        "f1": f1,
        "accuracy": accuracy,
        "actions": story_evaluation.action_list,
        "in_training_data_fraction":
            story_evaluation.in_training_data_fraction,
        "is_end_to_end_evaluation": use_e2e
    } 
Example #13
Source File: test.py    From rasa-for-botfront with Apache License 2.0 4 votes vote down vote up
def test(
    stories: Text,
    agent: "Agent",
    max_stories: Optional[int] = None,
    out_directory: Optional[Text] = None,
    fail_on_prediction_errors: bool = False,
    e2e: bool = False,
    disable_plotting: bool = False,
):
    """Run the evaluation of the stories, optionally plot the results."""
    from rasa.nlu.test import get_evaluation_metrics

    completed_trackers = await _generate_trackers(stories, agent, max_stories, e2e)

    story_evaluation, _ = collect_story_predictions(
        completed_trackers, agent, fail_on_prediction_errors, e2e
    )

    evaluation_store = story_evaluation.evaluation_store

    with warnings.catch_warnings():
        from sklearn.exceptions import UndefinedMetricWarning

        warnings.simplefilter("ignore", UndefinedMetricWarning)

        targets, predictions = evaluation_store.serialise()
        report, precision, f1, accuracy = get_evaluation_metrics(targets, predictions)

    if out_directory:
        plot_story_evaluation(
            evaluation_store.action_targets,
            evaluation_store.action_predictions,
            report,
            precision,
            f1,
            accuracy,
            story_evaluation.in_training_data_fraction,
            out_directory,
            disable_plotting,
        )

    log_failed_stories(story_evaluation.failed_stories, out_directory)

    return {
        "report": report,
        "precision": precision,
        "f1": f1,
        "accuracy": accuracy,
        "actions": story_evaluation.action_list,
        "in_training_data_fraction": story_evaluation.in_training_data_fraction,
        "is_end_to_end_evaluation": e2e,
    } 
Example #14
Source File: test_recall.py    From ignite with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_multilabel_input_NC():
    def _test(average):
        re = Recall(average=average, is_multilabel=True)

        y_pred = torch.randint(0, 2, size=(20, 5))
        y = torch.randint(0, 2, size=(20, 5)).long()
        re.update((y_pred, y))
        np_y_pred = to_numpy_multilabel(y_pred)
        np_y = to_numpy_multilabel(y)
        assert re._type == "multilabel"
        re_compute = re.compute() if average else re.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute)

        re.reset()
        y_pred = torch.randint(0, 2, size=(10, 4))
        y = torch.randint(0, 2, size=(10, 4)).long()
        re.update((y_pred, y))
        np_y_pred = y_pred.numpy()
        np_y = y.numpy()
        assert re._type == "multilabel"
        re_compute = re.compute() if average else re.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute)

        # Batched Updates
        re.reset()
        y_pred = torch.randint(0, 2, size=(100, 4))
        y = torch.randint(0, 2, size=(100, 4)).long()

        batch_size = 16
        n_iters = y.shape[0] // batch_size + 1

        for i in range(n_iters):
            idx = i * batch_size
            re.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))

        np_y = y.numpy()
        np_y_pred = y_pred.numpy()
        assert re._type == "multilabel"
        re_compute = re.compute() if average else re.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute)

    for _ in range(5):
        _test(average=True)
        _test(average=False)

    re1 = Recall(is_multilabel=True, average=True)
    re2 = Recall(is_multilabel=True, average=False)
    y_pred = torch.randint(0, 2, size=(10, 4))
    y = torch.randint(0, 2, size=(10, 4)).long()
    re1.update((y_pred, y))
    re2.update((y_pred, y))
    assert re1.compute() == pytest.approx(re2.compute().mean().item()) 
Example #15
Source File: test_recall.py    From ignite with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_multilabel_input_NCL():
    def _test(average):
        re = Recall(average=average, is_multilabel=True)

        y_pred = torch.randint(0, 2, size=(10, 5, 10))
        y = torch.randint(0, 2, size=(10, 5, 10)).long()
        re.update((y_pred, y))
        np_y_pred = to_numpy_multilabel(y_pred)
        np_y = to_numpy_multilabel(y)
        assert re._type == "multilabel"
        re_compute = re.compute() if average else re.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute)

        re.reset()
        y_pred = torch.randint(0, 2, size=(15, 4, 10))
        y = torch.randint(0, 2, size=(15, 4, 10)).long()
        re.update((y_pred, y))
        np_y_pred = to_numpy_multilabel(y_pred)
        np_y = to_numpy_multilabel(y)
        assert re._type == "multilabel"
        re_compute = re.compute() if average else re.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute)

        # Batched Updates
        re.reset()
        y_pred = torch.randint(0, 2, size=(100, 4, 12))
        y = torch.randint(0, 2, size=(100, 4, 12)).long()

        batch_size = 16
        n_iters = y.shape[0] // batch_size + 1

        for i in range(n_iters):
            idx = i * batch_size
            re.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))

        np_y = to_numpy_multilabel(y)
        np_y_pred = to_numpy_multilabel(y_pred)
        assert re._type == "multilabel"
        re_compute = re.compute() if average else re.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute)

    for _ in range(5):
        _test(average=True)
        _test(average=False)

    re1 = Recall(is_multilabel=True, average=True)
    re2 = Recall(is_multilabel=True, average=False)
    y_pred = torch.randint(0, 2, size=(10, 4, 20))
    y = torch.randint(0, 2, size=(10, 4, 20)).long()
    re1.update((y_pred, y))
    re2.update((y_pred, y))
    assert re1.compute() == pytest.approx(re2.compute().mean().item()) 
Example #16
Source File: test_recall.py    From ignite with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_multilabel_input_NCHW():
    def _test(average):
        re = Recall(average=average, is_multilabel=True)

        y_pred = torch.randint(0, 2, size=(10, 5, 18, 16))
        y = torch.randint(0, 2, size=(10, 5, 18, 16)).long()
        re.update((y_pred, y))
        np_y_pred = to_numpy_multilabel(y_pred)
        np_y = to_numpy_multilabel(y)
        assert re._type == "multilabel"
        re_compute = re.compute() if average else re.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute)

        re.reset()
        y_pred = torch.randint(0, 2, size=(10, 4, 20, 23))
        y = torch.randint(0, 2, size=(10, 4, 20, 23)).long()
        re.update((y_pred, y))
        np_y_pred = to_numpy_multilabel(y_pred)
        np_y = to_numpy_multilabel(y)
        assert re._type == "multilabel"
        re_compute = re.compute() if average else re.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute)

        # Batched Updates
        re.reset()
        y_pred = torch.randint(0, 2, size=(100, 5, 12, 14))
        y = torch.randint(0, 2, size=(100, 5, 12, 14)).long()

        batch_size = 16
        n_iters = y.shape[0] // batch_size + 1

        for i in range(n_iters):
            idx = i * batch_size
            re.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))

        np_y = to_numpy_multilabel(y)
        np_y_pred = to_numpy_multilabel(y_pred)
        assert re._type == "multilabel"
        re_compute = re.compute() if average else re.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute)

    for _ in range(5):
        _test(average=True)
        _test(average=False)

    re1 = Recall(is_multilabel=True, average=True)
    re2 = Recall(is_multilabel=True, average=False)
    y_pred = torch.randint(0, 2, size=(10, 4, 20, 23))
    y = torch.randint(0, 2, size=(10, 4, 20, 23)).long()
    re1.update((y_pred, y))
    re2.update((y_pred, y))
    assert re1.compute() == pytest.approx(re2.compute().mean().item()) 
Example #17
Source File: test_precision.py    From ignite with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_multilabel_input_NC():
    def _test(average):
        pr = Precision(average=average, is_multilabel=True)

        y_pred = torch.randint(0, 2, size=(20, 5))
        y = torch.randint(0, 2, size=(20, 5)).long()
        pr.update((y_pred, y))
        np_y_pred = y_pred.numpy()
        np_y = y.numpy()
        assert pr._type == "multilabel"
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(np_y, np_y_pred, average="samples") == pytest.approx(pr_compute)

        pr.reset()
        y_pred = torch.randint(0, 2, size=(10, 4))
        y = torch.randint(0, 2, size=(10, 4)).long()
        pr.update((y_pred, y))
        np_y_pred = y_pred.numpy()
        np_y = y.numpy()
        assert pr._type == "multilabel"
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(np_y, np_y_pred, average="samples") == pytest.approx(pr_compute)

        # Batched Updates
        pr.reset()
        y_pred = torch.randint(0, 2, size=(100, 4))
        y = torch.randint(0, 2, size=(100, 4)).long()

        batch_size = 16
        n_iters = y.shape[0] // batch_size + 1

        for i in range(n_iters):
            idx = i * batch_size
            pr.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))

        np_y = y.numpy()
        np_y_pred = y_pred.numpy()
        assert pr._type == "multilabel"
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(np_y, np_y_pred, average="samples") == pytest.approx(pr_compute)

    for _ in range(5):
        _test(average=True)
        _test(average=False)

    pr1 = Precision(is_multilabel=True, average=True)
    pr2 = Precision(is_multilabel=True, average=False)
    y_pred = torch.randint(0, 2, size=(10, 4, 20, 23))
    y = torch.randint(0, 2, size=(10, 4, 20, 23)).long()
    pr1.update((y_pred, y))
    pr2.update((y_pred, y))
    assert pr1.compute() == pytest.approx(pr2.compute().mean().item()) 
Example #18
Source File: test_precision.py    From ignite with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_multilabel_input_NCL():
    def _test(average):
        pr = Precision(average=average, is_multilabel=True)

        y_pred = torch.randint(0, 2, size=(10, 5, 10))
        y = torch.randint(0, 2, size=(10, 5, 10)).long()
        pr.update((y_pred, y))
        np_y_pred = to_numpy_multilabel(y_pred)
        np_y = to_numpy_multilabel(y)
        assert pr._type == "multilabel"
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(np_y, np_y_pred, average="samples") == pytest.approx(pr_compute)

        pr.reset()
        y_pred = torch.randint(0, 2, size=(15, 4, 10))
        y = torch.randint(0, 2, size=(15, 4, 10)).long()
        pr.update((y_pred, y))
        np_y_pred = to_numpy_multilabel(y_pred)
        np_y = to_numpy_multilabel(y)
        assert pr._type == "multilabel"
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(np_y, np_y_pred, average="samples") == pytest.approx(pr_compute)

        # Batched Updates
        pr.reset()
        y_pred = torch.randint(0, 2, size=(100, 4, 12))
        y = torch.randint(0, 2, size=(100, 4, 12)).long()

        batch_size = 16
        n_iters = y.shape[0] // batch_size + 1

        for i in range(n_iters):
            idx = i * batch_size
            pr.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))

        np_y = to_numpy_multilabel(y)
        np_y_pred = to_numpy_multilabel(y_pred)
        assert pr._type == "multilabel"
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(np_y, np_y_pred, average="samples") == pytest.approx(pr_compute)

    for _ in range(5):
        _test(average=True)
        _test(average=False)

    pr1 = Precision(is_multilabel=True, average=True)
    pr2 = Precision(is_multilabel=True, average=False)
    y_pred = torch.randint(0, 2, size=(10, 4, 20, 23))
    y = torch.randint(0, 2, size=(10, 4, 20, 23)).long()
    pr1.update((y_pred, y))
    pr2.update((y_pred, y))
    assert pr1.compute() == pytest.approx(pr2.compute().mean().item()) 
Example #19
Source File: metrics.py    From AIF360 with Apache License 2.0 4 votes vote down vote up
def ratio(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
          **kwargs):
    """Compute the ratio between unprivileged and privileged subsets for an
    arbitrary metric.

    Note: The optimal value of a ratio is 1. To make it a scorer, one must
    take the minimum of the ratio and its inverse.

    Unprivileged group is taken to be the inverse of the privileged group.

    Args:
        func (function): A metric function from :mod:`sklearn.metrics` or
            :mod:`aif360.sklearn.metrics.metrics`.
        y (pandas.Series): Outcome vector with protected attributes as index.
        *args: Additional positional args to be passed through to func.
        prot_attr (array-like, keyword-only): Protected attribute(s). If
            ``None``, all protected attributes in y are used.
        priv_group (scalar, optional): The label of the privileged group.
        sample_weight (array-like, optional): Sample weights passed through to
            func.
        **kwargs: Additional keyword args to be passed through to func.

    Returns:
        scalar: Ratio of metric values for unprivileged and privileged groups.
    """
    groups, _ = check_groups(y, prot_attr)
    idx = (groups == priv_group)
    unpriv = map(lambda a: a[~idx], (y,) + args)
    priv = map(lambda a: a[idx], (y,) + args)
    if sample_weight is not None:
        numerator = func(*unpriv, sample_weight=sample_weight[~idx], **kwargs)
        denominator = func(*priv, sample_weight=sample_weight[idx], **kwargs)
    else:
        numerator = func(*unpriv, **kwargs)
        denominator = func(*priv, **kwargs)

    if denominator == 0:
        warnings.warn("The ratio is ill-defined and being set to 0.0 because "
                      "'{}' for privileged samples is 0.".format(func.__name__),
                      UndefinedMetricWarning)
        return 0.

    return numerator / denominator


# =========================== SCORER FACTORY ================================= 
Example #20
Source File: test_classification.py    From twitter-stock-recommendation with MIT License 4 votes vote down vote up
def test_prf_warnings():
    # average of per-label scores
    f, w = precision_recall_fscore_support, UndefinedMetricWarning
    my_assert = assert_warns_message
    for average in [None, 'weighted', 'macro']:
        msg = ('Precision and F-score are ill-defined and '
               'being set to 0.0 in labels with no predicted samples.')
        my_assert(w, msg, f, [0, 1, 2], [1, 1, 2], average=average)

        msg = ('Recall and F-score are ill-defined and '
               'being set to 0.0 in labels with no true samples.')
        my_assert(w, msg, f, [1, 1, 2], [0, 1, 2], average=average)

    # average of per-sample scores
    msg = ('Precision and F-score are ill-defined and '
           'being set to 0.0 in samples with no predicted labels.')
    my_assert(w, msg, f, np.array([[1, 0], [1, 0]]),
              np.array([[1, 0], [0, 0]]), average='samples')

    msg = ('Recall and F-score are ill-defined and '
           'being set to 0.0 in samples with no true labels.')
    my_assert(w, msg, f, np.array([[1, 0], [0, 0]]),
              np.array([[1, 0], [1, 0]]),
              average='samples')

    # single score: micro-average
    msg = ('Precision and F-score are ill-defined and '
           'being set to 0.0 due to no predicted samples.')
    my_assert(w, msg, f, np.array([[1, 1], [1, 1]]),
              np.array([[0, 0], [0, 0]]), average='micro')

    msg = ('Recall and F-score are ill-defined and '
           'being set to 0.0 due to no true samples.')
    my_assert(w, msg, f, np.array([[0, 0], [0, 0]]),
              np.array([[1, 1], [1, 1]]), average='micro')

    # single positive label
    msg = ('Precision and F-score are ill-defined and '
           'being set to 0.0 due to no predicted samples.')
    my_assert(w, msg, f, [1, 1], [-1, -1], average='binary')

    msg = ('Recall and F-score are ill-defined and '
           'being set to 0.0 due to no true samples.')
    my_assert(w, msg, f, [-1, -1], [1, 1], average='binary')