Python sklearn.model_selection.LeaveOneOut() Examples

The following are 19 code examples for showing how to use sklearn.model_selection.LeaveOneOut(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function .

Example 1
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_split.py    License: MIT License 6 votes vote down vote up
def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e) 
Example 2
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_calibration.py    License: MIT License 6 votes vote down vote up
def test_calibration_less_classes():
    # Test to check calibration works fine when train set in a test-train
    # split does not contain all classes
    # Since this test uses LOO, at each iteration train set will not contain a
    # class label
    X = np.random.randn(10, 5)
    y = np.arange(10)
    clf = LinearSVC(C=1.0)
    cal_clf = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
    cal_clf.fit(X, y)

    for i, calibrated_classifier in \
            enumerate(cal_clf.calibrated_classifiers_):
        proba = calibrated_classifier.predict_proba(X)
        assert_array_equal(proba[:, i], np.zeros(len(y)))
        assert_equal(np.all(np.hstack([proba[:, :i],
                                       proba[:, i + 1:]])), True) 
Example 3
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_split.py    License: MIT License 6 votes vote down vote up
def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e) 
Example 4
Project: MKLpy   Author: IvanoLauriola   File: unit_tests.py    License: GNU General Public License v3.0 5 votes vote down vote up
def test_cross_val_score(self):
		mkl = algorithms.AverageMKL()
		scores = cross_val_score(self.KL, self.Y, mkl)
		self.assertEqual(len(scores), 3)
		self.assertEqual(len(cross_val_score(self.KL, self.Y, mkl, n_folds=5)), 5)
		self.assertRaises(ValueError, cross_val_score, self.KL, self.Y, mkl, scoring='pippo franco')
		loo = LeaveOneOut()
		scores = cross_val_score(self.KL, self.Y, mkl, cv=loo, scoring='accuracy')
		self.assertEqual(len(scores), len(self.Y)) 
Example 5
Project: FAE   Author: salan668   File: CrossValidation.py    License: GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, n_split='', description=''):
        self._description = description
        if n_split == 'all':
            self._cv = LeaveOneOut()
            self._name = 'LeaveOneOut'
        else:
            self._cv = StratifiedKFold(int(n_split), shuffle=False)
            self._name = '{}-Fold'.format(int(n_split))
        pass 
Example 6
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_split.py    License: MIT License 5 votes vote down vote up
def test_nested_cv():
    # Test if nested cross validation works with different combinations of cv
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    groups = rng.randint(0, 5, 15)

    cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(), StratifiedKFold(),
           StratifiedShuffleSplit(n_splits=3, random_state=0)]

    for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
        gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
                          cv=inner_cv, error_score='raise', iid=False)
        cross_val_score(gs, X=X, y=y, groups=groups, cv=outer_cv,
                        fit_params={'groups': groups}) 
Example 7
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_split.py    License: MIT License 5 votes vote down vote up
def test_leave_one_out_empty_trainset():
    # LeaveOneGroup out expect at least 2 groups so no need to check
    cv = LeaveOneOut()
    X, y = [[1]], [0]  # 1 sample
    with pytest.raises(
            ValueError,
            match='Cannot perform LeaveOneOut with n_samples=1'):
        next(cv.split(X, y)) 
Example 8
Project: pyDML   Author: jlsuarezdiaz   File: knn.py    License: GNU General Public License v3.0 5 votes vote down vote up
def loo_prob(self, X):
        """
        Predicts the probabilities for the given data using them as a training and with Leave One Out.

        X : 2D-Array or Matrix, default=None

            The dataset to be used.

        Returns
        -------

        T : 2D-Array, shape (N x c)

            A matrix with the probabilities for each class. N is the number of samples and c is the number of classes.
            The element i, j shows the probability of sample X[i] to be in class j.
        """
        loo = LeaveOneOut()
        probs = np.empty([self.y_.size, self.num_labels])

        for train_index, test_index in loo.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train = self.y_[train_index]

            knnloo = neighbors.KNeighborsClassifier(self.nn_)
            knnloo.fit(X_train, y_train)

            probs[test_index, :] = knnloo.predict_proba(X_test)

        return probs 
Example 9
Project: pyDML   Author: jlsuarezdiaz   File: knn.py    License: GNU General Public License v3.0 5 votes vote down vote up
def loo_pred(self, X):
        """
        Obtains the predicted for the given data using them as a training and with Leave One Out.

        X : 2D-Array or Matrix, default=None

            The dataset to be used.

        Returns
        -------

        y : 1D-Array

            The vector with the label predictions.
        """
        loo = LeaveOneOut()
        preds = np.empty(self.y_.size)

        for train_index, test_index in loo.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train = self.y_[train_index]

            knnloo = neighbors.KNeighborsClassifier(self.nn_)
            knnloo.fit(X_train, y_train)

            preds[test_index] = knnloo.predict(X_test)

        return preds 
Example 10
Project: pyDML   Author: jlsuarezdiaz   File: multidml_knn.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _loo_pred(self, X):
        loo = LeaveOneOut()
        preds = np.empty([self.y_.size], dtype=self.y_.dtype)

        for train_index, test_index in loo.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train = self.y_[train_index]

            knnloo = neighbors.KNeighborsClassifier(self.nn_)
            knnloo.fit(X_train, y_train)

            preds[test_index] = knnloo.predict(X_test)

        return preds 
Example 11
Project: pandas-ml   Author: pandas-ml   File: test_model_selection.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.model_selection.KFold, ms.KFold)
        self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold)
        self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut)

        self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.model_selection.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.model_selection.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler)

        # Model validation 
Example 12
Project: pandas-ml   Author: pandas-ml   File: test_model_selection.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper_abbr(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.ms.KFold, ms.KFold)
        self.assertIs(df.ms.GroupKFold, ms.GroupKFold)
        self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.ms.LeavePOut, ms.LeavePOut)

        self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.ms.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.ms.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler)

        # Model validation 
Example 13
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_split.py    License: MIT License 5 votes vote down vote up
def test_nested_cv():
    # Test if nested cross validation works with different combinations of cv
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    groups = rng.randint(0, 5, 15)

    cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(), StratifiedKFold(),
           StratifiedShuffleSplit(n_splits=3, random_state=0)]

    for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
        gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
                          cv=inner_cv)
        cross_val_score(gs, X=X, y=y, groups=groups, cv=outer_cv,
                        fit_params={'groups': groups}) 
Example 14
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_calibration.py    License: MIT License 5 votes vote down vote up
def test_calibration_prob_sum():
    # Test that sum of probabilities is 1. A non-regression test for
    # issue #7796
    num_classes = 2
    X, y = make_classification(n_samples=10, n_features=5,
                               n_classes=num_classes)
    clf = LinearSVC(C=1.0)
    clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
    clf_prob.fit(X, y)

    probs = clf_prob.predict_proba(X)
    assert_array_almost_equal(probs.sum(axis=1), np.ones(probs.shape[0])) 
Example 15
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_split.py    License: MIT License 4 votes vote down vote up
def test_cross_validator_with_default_params():
    n_samples = 4
    n_unique_groups = 4
    n_splits = 2
    p = 2
    n_shuffle_splits = 10  # (the default value)

    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    X_1d = np.array([1, 2, 3, 4])
    y = np.array([1, 1, 2, 2])
    groups = np.array([1, 2, 3, 4])
    loo = LeaveOneOut()
    lpo = LeavePOut(p)
    kf = KFold(n_splits)
    skf = StratifiedKFold(n_splits)
    lolo = LeaveOneGroupOut()
    lopo = LeavePGroupsOut(p)
    ss = ShuffleSplit(random_state=0)
    ps = PredefinedSplit([1, 1, 2, 2])  # n_splits = np of unique folds = 2

    loo_repr = "LeaveOneOut()"
    lpo_repr = "LeavePOut(p=2)"
    kf_repr = "KFold(n_splits=2, random_state=None, shuffle=False)"
    skf_repr = "StratifiedKFold(n_splits=2, random_state=None, shuffle=False)"
    lolo_repr = "LeaveOneGroupOut()"
    lopo_repr = "LeavePGroupsOut(n_groups=2)"
    ss_repr = ("ShuffleSplit(n_splits=10, random_state=0, "
               "test_size=None, train_size=None)")
    ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))"

    n_splits_expected = [n_samples, comb(n_samples, p), n_splits, n_splits,
                         n_unique_groups, comb(n_unique_groups, p),
                         n_shuffle_splits, 2]

    for i, (cv, cv_repr) in enumerate(zip(
            [loo, lpo, kf, skf, lolo, lopo, ss, ps],
            [loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr,
             ss_repr, ps_repr])):
        # Test if get_n_splits works correctly
        assert_equal(n_splits_expected[i], cv.get_n_splits(X, y, groups))

        # Test if the cross-validator works as expected even if
        # the data is 1d
        np.testing.assert_equal(list(cv.split(X, y, groups)),
                                list(cv.split(X_1d, y, groups)))
        # Test that train, test indices returned are integers
        for train, test in cv.split(X, y, groups):
            assert_equal(np.asarray(train).dtype.kind, 'i')
            assert_equal(np.asarray(train).dtype.kind, 'i')

        # Test if the repr works without any errors
        assert_equal(cv_repr, repr(cv))

    # ValueError for get_n_splits methods
    msg = "The 'X' parameter should not be None."
    assert_raise_message(ValueError, msg,
                         loo.get_n_splits, None, y, groups)
    assert_raise_message(ValueError, msg,
                         lpo.get_n_splits, None, y, groups) 
Example 16
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 4 votes vote down vote up
def test_cross_val_predict():
    boston = load_boston()
    X, y = boston.data, boston.target
    cv = KFold()

    est = Ridge()

    # Naive loop (should be same as cross_val_predict):
    preds2 = np.zeros_like(y)
    for train, test in cv.split(X, y):
        est.fit(X[train], y[train])
        preds2[test] = est.predict(X[test])

    preds = cross_val_predict(est, X, y, cv=cv)
    assert_array_almost_equal(preds, preds2)

    preds = cross_val_predict(est, X, y)
    assert_equal(len(preds), len(y))

    cv = LeaveOneOut()
    preds = cross_val_predict(est, X, y, cv=cv)
    assert_equal(len(preds), len(y))

    Xsp = X.copy()
    Xsp *= (Xsp > np.median(Xsp))
    Xsp = coo_matrix(Xsp)
    preds = cross_val_predict(est, Xsp, y)
    assert_array_almost_equal(len(preds), len(y))

    preds = cross_val_predict(KMeans(), X)
    assert_equal(len(preds), len(y))

    class BadCV():
        def split(self, X, y=None, groups=None):
            for i in range(4):
                yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])

    assert_raises(ValueError, cross_val_predict, est, X, y, cv=BadCV())

    X, y = load_iris(return_X_y=True)

    warning_message = ('Number of classes in training fold (2) does '
                       'not match total number of classes (3). '
                       'Results may not be appropriate for your use case.')
    assert_warns_message(RuntimeWarning, warning_message,
                         cross_val_predict, LogisticRegression(),
                         X, y, method='predict_proba', cv=KFold(2)) 
Example 17
Project: parfit   Author: jmcarpenter2   File: crossval.py    License: MIT License 4 votes vote down vote up
def crossvalOne(model, X, y, params, nfolds, metric=roc_auc_score, predict_proba=True, n_jobs=-1, verbose=1):
    """
    Makes one cross-validation model fit-score run using provided data and parameters
    :param model: The instantiated model you wish to pass, e.g. LogisticRegression()
    :param X: The independent variable data
    :param y: The response variable data
    :param params: The parameters passed through to the model from the parameter grid
    :param nfolds: The number of folds you wish to use for cross-validation
    :param metric: The metric you wish to use to score the crossval predictions using
    :param predict_proba: Choice between using 'predict_proba' and 'predict' for scoring routine.
        Default True means predict_proba and False means predict
    :param n_jobs: Number of cores to use in parallelization (defaults to -1: all cores)
    :param verbose: The level of verbosity of reporting updates on parallel process
        Default is 10 (send an update at the completion of each job)
    :return: Returns the mean of the cross-validation scores
    """
    if 'random_state' in params.keys():
        random_state = params['random_state']
    else:
        random_state = None

    if nfolds >= X.shape[0]:
        cv = LeaveOneOut()
    else:
        cv = StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=random_state)

    train_indices, test_indices = zip(*cv.split(X, y))
    if isinstance(model, BaseEstimator):
        fitted_models = Parallel(n_jobs=n_jobs, verbose=verbose)(
            delayed(fitOne)(type(model), np.asarray(X)[train_index], np.asarray(y)[train_index], params)
            for train_index in train_indices
        )
    else:
        fitted_models = Parallel(n_jobs=n_jobs, verbose=verbose)(
            delayed(fitOne)(model, np.asarray(X)[train_index], np.asarray(y)[train_index], params)
            for train_index in train_indices
        )
    if nfolds >= X.shape[0]:
        score = scoreLeaveOneOut(fitted_models, test_indices, X, y, metric, predict_proba)
        return score
    else:
        scores = Parallel(n_jobs=n_jobs, verbose=0)(
            delayed(scoreOne)(fitted_model, np.asarray(X)[test_index], np.asarray(y)[test_index], metric, predict_proba)
            for fitted_model, test_index in zip(fitted_models, test_indices)
        )
        return np.mean(scores) 
Example 18
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_split.py    License: MIT License 4 votes vote down vote up
def test_cross_validator_with_default_params():
    n_samples = 4
    n_unique_groups = 4
    n_splits = 2
    p = 2
    n_shuffle_splits = 10  # (the default value)

    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    X_1d = np.array([1, 2, 3, 4])
    y = np.array([1, 1, 2, 2])
    groups = np.array([1, 2, 3, 4])
    loo = LeaveOneOut()
    lpo = LeavePOut(p)
    kf = KFold(n_splits)
    skf = StratifiedKFold(n_splits)
    lolo = LeaveOneGroupOut()
    lopo = LeavePGroupsOut(p)
    ss = ShuffleSplit(random_state=0)
    ps = PredefinedSplit([1, 1, 2, 2])  # n_splits = np of unique folds = 2

    loo_repr = "LeaveOneOut()"
    lpo_repr = "LeavePOut(p=2)"
    kf_repr = "KFold(n_splits=2, random_state=None, shuffle=False)"
    skf_repr = "StratifiedKFold(n_splits=2, random_state=None, shuffle=False)"
    lolo_repr = "LeaveOneGroupOut()"
    lopo_repr = "LeavePGroupsOut(n_groups=2)"
    ss_repr = ("ShuffleSplit(n_splits=10, random_state=0, "
               "test_size='default',\n       train_size=None)")
    ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))"

    n_splits_expected = [n_samples, comb(n_samples, p), n_splits, n_splits,
                         n_unique_groups, comb(n_unique_groups, p),
                         n_shuffle_splits, 2]

    for i, (cv, cv_repr) in enumerate(zip(
            [loo, lpo, kf, skf, lolo, lopo, ss, ps],
            [loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr,
             ss_repr, ps_repr])):
        # Test if get_n_splits works correctly
        assert_equal(n_splits_expected[i], cv.get_n_splits(X, y, groups))

        # Test if the cross-validator works as expected even if
        # the data is 1d
        np.testing.assert_equal(list(cv.split(X, y, groups)),
                                list(cv.split(X_1d, y, groups)))
        # Test that train, test indices returned are integers
        for train, test in cv.split(X, y, groups):
            assert_equal(np.asarray(train).dtype.kind, 'i')
            assert_equal(np.asarray(train).dtype.kind, 'i')

        # Test if the repr works without any errors
        assert_equal(cv_repr, repr(cv))

    # ValueError for get_n_splits methods
    msg = "The 'X' parameter should not be None."
    assert_raise_message(ValueError, msg,
                         loo.get_n_splits, None, y, groups)
    assert_raise_message(ValueError, msg,
                         lpo.get_n_splits, None, y, groups) 
Example 19
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_validation.py    License: MIT License 4 votes vote down vote up
def test_cross_val_predict():
    boston = load_boston()
    X, y = boston.data, boston.target
    cv = KFold()

    est = Ridge()

    # Naive loop (should be same as cross_val_predict):
    preds2 = np.zeros_like(y)
    for train, test in cv.split(X, y):
        est.fit(X[train], y[train])
        preds2[test] = est.predict(X[test])

    preds = cross_val_predict(est, X, y, cv=cv)
    assert_array_almost_equal(preds, preds2)

    preds = cross_val_predict(est, X, y)
    assert_equal(len(preds), len(y))

    cv = LeaveOneOut()
    preds = cross_val_predict(est, X, y, cv=cv)
    assert_equal(len(preds), len(y))

    Xsp = X.copy()
    Xsp *= (Xsp > np.median(Xsp))
    Xsp = coo_matrix(Xsp)
    preds = cross_val_predict(est, Xsp, y)
    assert_array_almost_equal(len(preds), len(y))

    preds = cross_val_predict(KMeans(), X)
    assert_equal(len(preds), len(y))

    class BadCV():
        def split(self, X, y=None, groups=None):
            for i in range(4):
                yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])

    assert_raises(ValueError, cross_val_predict, est, X, y, cv=BadCV())

    X, y = load_iris(return_X_y=True)

    warning_message = ('Number of classes in training fold (2) does '
                       'not match total number of classes (3). '
                       'Results may not be appropriate for your use case.')
    assert_warns_message(RuntimeWarning, warning_message,
                         cross_val_predict, LogisticRegression(),
                         X, y, method='predict_proba', cv=KFold(2))