Python sklearn.model_selection.validation_curve() Examples

The following are 8 code examples of sklearn.model_selection.validation_curve(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function .
Example #1
Source File: test_model_selection.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_validation_curve(self):
        digits = datasets.load_digits()
        df = pdml.ModelFrame(digits)

        param_range = np.logspace(-2, -1, 2)

        svc = df.svm.SVC(random_state=self.random_state)
        result = df.model_selection.validation_curve(svc, 'gamma',
                                                     param_range)
        expected = ms.validation_curve(svm.SVC(random_state=self.random_state),
                                       digits.data, digits.target,
                                       'gamma', param_range)

        self.assertEqual(len(result), 2)
        self.assert_numpy_array_almost_equal(result[0], expected[0])
        self.assert_numpy_array_almost_equal(result[1], expected[1]) 
Example #2
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_validation_curve():
    X, y = make_classification(n_samples=2, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    param_range = np.linspace(0, 1, 10)
    with warnings.catch_warnings(record=True) as w:
        train_scores, test_scores = validation_curve(
            MockEstimatorWithParameter(), X, y, param_name="param",
            param_range=param_range, cv=2
        )
    if len(w) > 0:
        raise RuntimeError("Unexpected warning: %r" % w[0].message)

    assert_array_almost_equal(train_scores.mean(axis=1), param_range)
    assert_array_almost_equal(test_scores.mean(axis=1), 1 - param_range) 
Example #3
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_validation_curve_clone_estimator():
    X, y = make_classification(n_samples=2, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)

    param_range = np.linspace(1, 0, 10)
    _, _ = validation_curve(
        MockEstimatorWithSingleFitCallAllowed(), X, y,
        param_name="param", param_range=param_range, cv=2
    ) 
Example #4
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_validation_curve_cv_splits_consistency():
    n_samples = 100
    n_splits = 5
    X, y = make_classification(n_samples=100, random_state=0)

    scores1 = validation_curve(SVC(kernel='linear', random_state=0), X, y,
                               'C', [0.1, 0.1, 0.2, 0.2],
                               cv=OneTimeSplitter(n_splits=n_splits,
                                                  n_samples=n_samples))
    # The OneTimeSplitter is a non-re-entrant cv splitter. Unless, the
    # `split` is called for each parameter, the following should produce
    # identical results for param setting 1 and param setting 2 as both have
    # the same C value.
    assert_array_almost_equal(*np.vsplit(np.hstack(scores1)[(0, 2, 1, 3), :],
                                         2))

    scores2 = validation_curve(SVC(kernel='linear', random_state=0), X, y,
                               'C', [0.1, 0.1, 0.2, 0.2],
                               cv=KFold(n_splits=n_splits, shuffle=True))

    # For scores2, compare the 1st and 2nd parameter's scores
    # (Since the C value for 1st two param setting is 0.1, they must be
    # consistent unless the train test folds differ between the param settings)
    assert_array_almost_equal(*np.vsplit(np.hstack(scores2)[(0, 2, 1, 3), :],
                                         2))

    scores3 = validation_curve(SVC(kernel='linear', random_state=0), X, y,
                               'C', [0.1, 0.1, 0.2, 0.2],
                               cv=KFold(n_splits=n_splits))

    # OneTimeSplitter is basically unshuffled KFold(n_splits=5). Sanity check.
    assert_array_almost_equal(np.array(scores3), np.array(scores1)) 
Example #5
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_validation_curve():
    X, y = make_classification(n_samples=2, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    param_range = np.linspace(0, 1, 10)
    with warnings.catch_warnings(record=True) as w:
        train_scores, test_scores = validation_curve(
            MockEstimatorWithParameter(), X, y, param_name="param",
            param_range=param_range, cv=2
        )
    if len(w) > 0:
        raise RuntimeError("Unexpected warning: %r" % w[0].message)

    assert_array_almost_equal(train_scores.mean(axis=1), param_range)
    assert_array_almost_equal(test_scores.mean(axis=1), 1 - param_range) 
Example #6
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_validation_curve_clone_estimator():
    X, y = make_classification(n_samples=2, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)

    param_range = np.linspace(1, 0, 10)
    _, _ = validation_curve(
        MockEstimatorWithSingleFitCallAllowed(), X, y,
        param_name="param", param_range=param_range, cv=2
    ) 
Example #7
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_validation_curve_cv_splits_consistency():
    n_samples = 100
    n_splits = 5
    X, y = make_classification(n_samples=100, random_state=0)

    scores1 = validation_curve(SVC(kernel='linear', random_state=0), X, y,
                               'C', [0.1, 0.1, 0.2, 0.2],
                               cv=OneTimeSplitter(n_splits=n_splits,
                                                  n_samples=n_samples))
    # The OneTimeSplitter is a non-re-entrant cv splitter. Unless, the
    # `split` is called for each parameter, the following should produce
    # identical results for param setting 1 and param setting 2 as both have
    # the same C value.
    assert_array_almost_equal(*np.vsplit(np.hstack(scores1)[(0, 2, 1, 3), :],
                                         2))

    scores2 = validation_curve(SVC(kernel='linear', random_state=0), X, y,
                               'C', [0.1, 0.1, 0.2, 0.2],
                               cv=KFold(n_splits=n_splits, shuffle=True))

    # For scores2, compare the 1st and 2nd parameter's scores
    # (Since the C value for 1st two param setting is 0.1, they must be
    # consistent unless the train test folds differ between the param settings)
    assert_array_almost_equal(*np.vsplit(np.hstack(scores2)[(0, 2, 1, 3), :],
                                         2))

    scores3 = validation_curve(SVC(kernel='linear', random_state=0), X, y,
                               'C', [0.1, 0.1, 0.2, 0.2],
                               cv=KFold(n_splits=n_splits))

    # OneTimeSplitter is basically unshuffled KFold(n_splits=5). Sanity check.
    assert_array_almost_equal(np.array(scores3), np.array(scores1)) 
Example #8
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 4 votes vote down vote up
def test_fit_and_score_failing():
    # Create a failing classifier to deliberately fail
    failing_clf = FailingClassifier(FailingClassifier.FAILING_PARAMETER)
    # dummy X data
    X = np.arange(1, 10)
    y = np.ones(9)
    fit_and_score_args = [failing_clf, X, None, dict(), None, None, 0,
                          None, None]
    # passing error score to trigger the warning message
    fit_and_score_kwargs = {'error_score': 0}
    # check if the warning message type is as expected
    assert_warns(FitFailedWarning, _fit_and_score, *fit_and_score_args,
                 **fit_and_score_kwargs)
    # since we're using FailingClassfier, our error will be the following
    error_message = "ValueError: Failing classifier failed as required"
    # the warning message we're expecting to see
    warning_message = ("Estimator fit failed. The score on this train-test "
                       "partition for these parameters will be set to %f. "
                       "Details: \n%s" % (fit_and_score_kwargs['error_score'],
                                          error_message))
    # check if the same warning is triggered
    assert_warns_message(FitFailedWarning, warning_message, _fit_and_score,
                         *fit_and_score_args, **fit_and_score_kwargs)

    # check if warning was raised, with default error_score argument
    warning_message = ("From version 0.22, errors during fit will result "
                       "in a cross validation score of NaN by default. Use "
                       "error_score='raise' if you want an exception "
                       "raised or error_score=np.nan to adopt the "
                       "behavior from version 0.22.")
    with pytest.raises(ValueError):
        assert_warns_message(FutureWarning, warning_message, _fit_and_score,
                             *fit_and_score_args)

    fit_and_score_kwargs = {'error_score': 'raise'}
    # check if exception was raised, with default error_score='raise'
    assert_raise_message(ValueError, "Failing classifier failed as required",
                         _fit_and_score, *fit_and_score_args,
                         **fit_and_score_kwargs)

    # check that functions upstream pass error_score param to _fit_and_score
    error_message = ("error_score must be the string 'raise' or a"
                     " numeric value. (Hint: if using 'raise', please"
                     " make sure that it has been spelled correctly.)")

    assert_raise_message(ValueError, error_message, cross_validate,
                         failing_clf, X, cv=3, error_score='unvalid-string')

    assert_raise_message(ValueError, error_message, cross_val_score,
                         failing_clf, X, cv=3, error_score='unvalid-string')

    assert_raise_message(ValueError, error_message, learning_curve,
                         failing_clf, X, y, cv=3, error_score='unvalid-string')

    assert_raise_message(ValueError, error_message, validation_curve,
                         failing_clf, X, y, 'parameter',
                         [FailingClassifier.FAILING_PARAMETER], cv=3,
                         error_score='unvalid-string')

    assert_equal(failing_clf.score(), 0.)  # FailingClassifier coverage