Python sklearn.model_selection.LeaveOneOut() Examples

The following are 19 code examples of sklearn.model_selection.LeaveOneOut(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function

Example #1

Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e)

Example #2

Source File: test_split.py From twitter-stock-recommendation with MIT License

6 votes

def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e)

Example #3

Source File: test_calibration.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_calibration_less_classes():
    # Test to check calibration works fine when train set in a test-train
    # split does not contain all classes
    # Since this test uses LOO, at each iteration train set will not contain a
    # class label
    X = np.random.randn(10, 5)
    y = np.arange(10)
    clf = LinearSVC(C=1.0)
    cal_clf = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
    cal_clf.fit(X, y)

    for i, calibrated_classifier in \
            enumerate(cal_clf.calibrated_classifiers_):
        proba = calibrated_classifier.predict_proba(X)
        assert_array_equal(proba[:, i], np.zeros(len(y)))
        assert_equal(np.all(np.hstack([proba[:, :i],
                                       proba[:, i + 1:]])), True)

Example #4

Source File: knn.py From pyDML with GNU General Public License v3.0

5 votes

def loo_prob(self, X):
        """
        Predicts the probabilities for the given data using them as a training and with Leave One Out.

        X : 2D-Array or Matrix, default=None

            The dataset to be used.

        Returns
        -------

        T : 2D-Array, shape (N x c)

            A matrix with the probabilities for each class. N is the number of samples and c is the number of classes.
            The element i, j shows the probability of sample X[i] to be in class j.
        """
        loo = LeaveOneOut()
        probs = np.empty([self.y_.size, self.num_labels])

        for train_index, test_index in loo.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train = self.y_[train_index]

            knnloo = neighbors.KNeighborsClassifier(self.nn_)
            knnloo.fit(X_train, y_train)

            probs[test_index, :] = knnloo.predict_proba(X_test)

        return probs

Example #5

Source File: test_calibration.py From twitter-stock-recommendation with MIT License

5 votes

def test_calibration_prob_sum():
    # Test that sum of probabilities is 1. A non-regression test for
    # issue #7796
    num_classes = 2
    X, y = make_classification(n_samples=10, n_features=5,
                               n_classes=num_classes)
    clf = LinearSVC(C=1.0)
    clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
    clf_prob.fit(X, y)

    probs = clf_prob.predict_proba(X)
    assert_array_almost_equal(probs.sum(axis=1), np.ones(probs.shape[0]))

Example #6

Source File: test_split.py From twitter-stock-recommendation with MIT License

5 votes

def test_nested_cv():
    # Test if nested cross validation works with different combinations of cv
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    groups = rng.randint(0, 5, 15)

    cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(), StratifiedKFold(),
           StratifiedShuffleSplit(n_splits=3, random_state=0)]

    for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
        gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
                          cv=inner_cv)
        cross_val_score(gs, X=X, y=y, groups=groups, cv=outer_cv,
                        fit_params={'groups': groups})

Example #7

Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_objectmapper_abbr(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.ms.KFold, ms.KFold)
        self.assertIs(df.ms.GroupKFold, ms.GroupKFold)
        self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.ms.LeavePOut, ms.LeavePOut)

        self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.ms.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.ms.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler)

        # Model validation

Example #8

Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_objectmapper(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.model_selection.KFold, ms.KFold)
        self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold)
        self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut)

        self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.model_selection.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.model_selection.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler)

        # Model validation

Example #9

Source File: multidml_knn.py From pyDML with GNU General Public License v3.0

5 votes

def _loo_pred(self, X):
        loo = LeaveOneOut()
        preds = np.empty([self.y_.size], dtype=self.y_.dtype)

        for train_index, test_index in loo.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train = self.y_[train_index]

            knnloo = neighbors.KNeighborsClassifier(self.nn_)
            knnloo.fit(X_train, y_train)

            preds[test_index] = knnloo.predict(X_test)

        return preds

Example #10

Source File: knn.py From pyDML with GNU General Public License v3.0

5 votes

def loo_pred(self, X):
        """
        Obtains the predicted for the given data using them as a training and with Leave One Out.

        X : 2D-Array or Matrix, default=None

            The dataset to be used.

        Returns
        -------

        y : 1D-Array

            The vector with the label predictions.
        """
        loo = LeaveOneOut()
        preds = np.empty(self.y_.size)

        for train_index, test_index in loo.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train = self.y_[train_index]

            knnloo = neighbors.KNeighborsClassifier(self.nn_)
            knnloo.fit(X_train, y_train)

            preds[test_index] = knnloo.predict(X_test)

        return preds

Example #11

Source File: unit_tests.py From MKLpy with GNU General Public License v3.0

5 votes

def test_cross_val_score(self):
		mkl = algorithms.AverageMKL()
		scores = cross_val_score(self.KL, self.Y, mkl)
		self.assertEqual(len(scores), 3)
		self.assertEqual(len(cross_val_score(self.KL, self.Y, mkl, n_folds=5)), 5)
		self.assertRaises(ValueError, cross_val_score, self.KL, self.Y, mkl, scoring='pippo franco')
		loo = LeaveOneOut()
		scores = cross_val_score(self.KL, self.Y, mkl, cv=loo, scoring='accuracy')
		self.assertEqual(len(scores), len(self.Y))

Example #12

Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_leave_one_out_empty_trainset():
    # LeaveOneGroup out expect at least 2 groups so no need to check
    cv = LeaveOneOut()
    X, y = [[1]], [0]  # 1 sample
    with pytest.raises(
            ValueError,
            match='Cannot perform LeaveOneOut with n_samples=1'):
        next(cv.split(X, y))

Example #13

Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_nested_cv():
    # Test if nested cross validation works with different combinations of cv
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    groups = rng.randint(0, 5, 15)

    cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(), StratifiedKFold(),
           StratifiedShuffleSplit(n_splits=3, random_state=0)]

    for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
        gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
                          cv=inner_cv, error_score='raise', iid=False)
        cross_val_score(gs, X=X, y=y, groups=groups, cv=outer_cv,
                        fit_params={'groups': groups})

Example #14

Source File: CrossValidation.py From FAE with GNU General Public License v3.0

5 votes

def __init__(self, n_split='', description=''):
        self._description = description
        if n_split == 'all':
            self._cv = LeaveOneOut()
            self._name = 'LeaveOneOut'
        else:
            self._cv = StratifiedKFold(int(n_split), shuffle=False)
            self._name = '{}-Fold'.format(int(n_split))
        pass

Example #15

Source File: crossval.py From parfit with MIT License

4 votes

def crossvalOne(model, X, y, params, nfolds, metric=roc_auc_score, predict_proba=True, n_jobs=-1, verbose=1):
    """
    Makes one cross-validation model fit-score run using provided data and parameters
    :param model: The instantiated model you wish to pass, e.g. LogisticRegression()
    :param X: The independent variable data
    :param y: The response variable data
    :param params: The parameters passed through to the model from the parameter grid
    :param nfolds: The number of folds you wish to use for cross-validation
    :param metric: The metric you wish to use to score the crossval predictions using
    :param predict_proba: Choice between using 'predict_proba' and 'predict' for scoring routine.
        Default True means predict_proba and False means predict
    :param n_jobs: Number of cores to use in parallelization (defaults to -1: all cores)
    :param verbose: The level of verbosity of reporting updates on parallel process
        Default is 10 (send an update at the completion of each job)
    :return: Returns the mean of the cross-validation scores
    """
    if 'random_state' in params.keys():
        random_state = params['random_state']
    else:
        random_state = None

    if nfolds >= X.shape[0]:
        cv = LeaveOneOut()
    else:
        cv = StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=random_state)

    train_indices, test_indices = zip(*cv.split(X, y))
    if isinstance(model, BaseEstimator):
        fitted_models = Parallel(n_jobs=n_jobs, verbose=verbose)(
            delayed(fitOne)(type(model), np.asarray(X)[train_index], np.asarray(y)[train_index], params)
            for train_index in train_indices
        )
    else:
        fitted_models = Parallel(n_jobs=n_jobs, verbose=verbose)(
            delayed(fitOne)(model, np.asarray(X)[train_index], np.asarray(y)[train_index], params)
            for train_index in train_indices
        )
    if nfolds >= X.shape[0]:
        score = scoreLeaveOneOut(fitted_models, test_indices, X, y, metric, predict_proba)
        return score
    else:
        scores = Parallel(n_jobs=n_jobs, verbose=0)(
            delayed(scoreOne)(fitted_model, np.asarray(X)[test_index], np.asarray(y)[test_index], metric, predict_proba)
            for fitted_model, test_index in zip(fitted_models, test_indices)
        )
        return np.mean(scores)

Example #16

Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License

4 votes

def test_cross_val_predict():
    boston = load_boston()
    X, y = boston.data, boston.target
    cv = KFold()

    est = Ridge()

    # Naive loop (should be same as cross_val_predict):
    preds2 = np.zeros_like(y)
    for train, test in cv.split(X, y):
        est.fit(X[train], y[train])
        preds2[test] = est.predict(X[test])

    preds = cross_val_predict(est, X, y, cv=cv)
    assert_array_almost_equal(preds, preds2)

    preds = cross_val_predict(est, X, y)
    assert_equal(len(preds), len(y))

    cv = LeaveOneOut()
    preds = cross_val_predict(est, X, y, cv=cv)
    assert_equal(len(preds), len(y))

    Xsp = X.copy()
    Xsp *= (Xsp > np.median(Xsp))
    Xsp = coo_matrix(Xsp)
    preds = cross_val_predict(est, Xsp, y)
    assert_array_almost_equal(len(preds), len(y))

    preds = cross_val_predict(KMeans(), X)
    assert_equal(len(preds), len(y))

    class BadCV():
        def split(self, X, y=None, groups=None):
            for i in range(4):
                yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])

    assert_raises(ValueError, cross_val_predict, est, X, y, cv=BadCV())

    X, y = load_iris(return_X_y=True)

    warning_message = ('Number of classes in training fold (2) does '
                       'not match total number of classes (3). '
                       'Results may not be appropriate for your use case.')
    assert_warns_message(RuntimeWarning, warning_message,
                         cross_val_predict, LogisticRegression(),
                         X, y, method='predict_proba', cv=KFold(2))

Example #17

Source File: test_split.py From twitter-stock-recommendation with MIT License

4 votes

def test_cross_validator_with_default_params():
    n_samples = 4
    n_unique_groups = 4
    n_splits = 2
    p = 2
    n_shuffle_splits = 10  # (the default value)

    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    X_1d = np.array([1, 2, 3, 4])
    y = np.array([1, 1, 2, 2])
    groups = np.array([1, 2, 3, 4])
    loo = LeaveOneOut()
    lpo = LeavePOut(p)
    kf = KFold(n_splits)
    skf = StratifiedKFold(n_splits)
    lolo = LeaveOneGroupOut()
    lopo = LeavePGroupsOut(p)
    ss = ShuffleSplit(random_state=0)
    ps = PredefinedSplit([1, 1, 2, 2])  # n_splits = np of unique folds = 2

    loo_repr = "LeaveOneOut()"
    lpo_repr = "LeavePOut(p=2)"
    kf_repr = "KFold(n_splits=2, random_state=None, shuffle=False)"
    skf_repr = "StratifiedKFold(n_splits=2, random_state=None, shuffle=False)"
    lolo_repr = "LeaveOneGroupOut()"
    lopo_repr = "LeavePGroupsOut(n_groups=2)"
    ss_repr = ("ShuffleSplit(n_splits=10, random_state=0, "
               "test_size='default',\n       train_size=None)")
    ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))"

    n_splits_expected = [n_samples, comb(n_samples, p), n_splits, n_splits,
                         n_unique_groups, comb(n_unique_groups, p),
                         n_shuffle_splits, 2]

    for i, (cv, cv_repr) in enumerate(zip(
            [loo, lpo, kf, skf, lolo, lopo, ss, ps],
            [loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr,
             ss_repr, ps_repr])):
        # Test if get_n_splits works correctly
        assert_equal(n_splits_expected[i], cv.get_n_splits(X, y, groups))

        # Test if the cross-validator works as expected even if
        # the data is 1d
        np.testing.assert_equal(list(cv.split(X, y, groups)),
                                list(cv.split(X_1d, y, groups)))
        # Test that train, test indices returned are integers
        for train, test in cv.split(X, y, groups):
            assert_equal(np.asarray(train).dtype.kind, 'i')
            assert_equal(np.asarray(train).dtype.kind, 'i')

        # Test if the repr works without any errors
        assert_equal(cv_repr, repr(cv))

    # ValueError for get_n_splits methods
    msg = "The 'X' parameter should not be None."
    assert_raise_message(ValueError, msg,
                         loo.get_n_splits, None, y, groups)
    assert_raise_message(ValueError, msg,
                         lpo.get_n_splits, None, y, groups)

Example #18

Source File: test_split.py From Mastering-Elasticsearch-7.0 with MIT License

4 votes

def test_cross_validator_with_default_params():
    n_samples = 4
    n_unique_groups = 4
    n_splits = 2
    p = 2
    n_shuffle_splits = 10  # (the default value)

    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    X_1d = np.array([1, 2, 3, 4])
    y = np.array([1, 1, 2, 2])
    groups = np.array([1, 2, 3, 4])
    loo = LeaveOneOut()
    lpo = LeavePOut(p)
    kf = KFold(n_splits)
    skf = StratifiedKFold(n_splits)
    lolo = LeaveOneGroupOut()
    lopo = LeavePGroupsOut(p)
    ss = ShuffleSplit(random_state=0)
    ps = PredefinedSplit([1, 1, 2, 2])  # n_splits = np of unique folds = 2

    loo_repr = "LeaveOneOut()"
    lpo_repr = "LeavePOut(p=2)"
    kf_repr = "KFold(n_splits=2, random_state=None, shuffle=False)"
    skf_repr = "StratifiedKFold(n_splits=2, random_state=None, shuffle=False)"
    lolo_repr = "LeaveOneGroupOut()"
    lopo_repr = "LeavePGroupsOut(n_groups=2)"
    ss_repr = ("ShuffleSplit(n_splits=10, random_state=0, "
               "test_size=None, train_size=None)")
    ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))"

    n_splits_expected = [n_samples, comb(n_samples, p), n_splits, n_splits,
                         n_unique_groups, comb(n_unique_groups, p),
                         n_shuffle_splits, 2]

    for i, (cv, cv_repr) in enumerate(zip(
            [loo, lpo, kf, skf, lolo, lopo, ss, ps],
            [loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr,
             ss_repr, ps_repr])):
        # Test if get_n_splits works correctly
        assert_equal(n_splits_expected[i], cv.get_n_splits(X, y, groups))

        # Test if the cross-validator works as expected even if
        # the data is 1d
        np.testing.assert_equal(list(cv.split(X, y, groups)),
                                list(cv.split(X_1d, y, groups)))
        # Test that train, test indices returned are integers
        for train, test in cv.split(X, y, groups):
            assert_equal(np.asarray(train).dtype.kind, 'i')
            assert_equal(np.asarray(train).dtype.kind, 'i')

        # Test if the repr works without any errors
        assert_equal(cv_repr, repr(cv))

    # ValueError for get_n_splits methods
    msg = "The 'X' parameter should not be None."
    assert_raise_message(ValueError, msg,
                         loo.get_n_splits, None, y, groups)
    assert_raise_message(ValueError, msg,
                         lpo.get_n_splits, None, y, groups)

Example #19

Source File: test_validation.py From twitter-stock-recommendation with MIT License

4 votes

def test_cross_val_predict():
    boston = load_boston()
    X, y = boston.data, boston.target
    cv = KFold()

    est = Ridge()

    # Naive loop (should be same as cross_val_predict):
    preds2 = np.zeros_like(y)
    for train, test in cv.split(X, y):
        est.fit(X[train], y[train])
        preds2[test] = est.predict(X[test])

    preds = cross_val_predict(est, X, y, cv=cv)
    assert_array_almost_equal(preds, preds2)

    preds = cross_val_predict(est, X, y)
    assert_equal(len(preds), len(y))

    cv = LeaveOneOut()
    preds = cross_val_predict(est, X, y, cv=cv)
    assert_equal(len(preds), len(y))

    Xsp = X.copy()
    Xsp *= (Xsp > np.median(Xsp))
    Xsp = coo_matrix(Xsp)
    preds = cross_val_predict(est, Xsp, y)
    assert_array_almost_equal(len(preds), len(y))

    preds = cross_val_predict(KMeans(), X)
    assert_equal(len(preds), len(y))

    class BadCV():
        def split(self, X, y=None, groups=None):
            for i in range(4):
                yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])

    assert_raises(ValueError, cross_val_predict, est, X, y, cv=BadCV())

    X, y = load_iris(return_X_y=True)

    warning_message = ('Number of classes in training fold (2) does '
                       'not match total number of classes (3). '
                       'Results may not be appropriate for your use case.')
    assert_warns_message(RuntimeWarning, warning_message,
                         cross_val_predict, LogisticRegression(),
                         X, y, method='predict_proba', cv=KFold(2))