Python sklearn.model_selection.RepeatedKFold() Examples

The following are 14 code examples for showing how to use sklearn.model_selection.RepeatedKFold(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function .

Example 1
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_split.py    License: MIT License 6 votes vote down vote up
def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e) 
Example 2
Project: autogluon   Author: awslabs   File: utils.py    License: Apache License 2.0 6 votes vote down vote up
def generate_kfold(X, y=None, n_splits=5, random_state=0, stratified=False, n_repeats=1):
    if stratified and (y is not None):
        if n_repeats > 1:
            kf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=random_state)
        else:
            kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)

        kf.get_n_splits(X, y)
        return [[train_index, test_index] for train_index, test_index in kf.split(X, y)]
    else:
        if n_repeats > 1:
            kf = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=random_state)
        else:
            kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)

        kf.get_n_splits(X)
        return [[train_index, test_index] for train_index, test_index in kf.split(X)] 
Example 3
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_split.py    License: MIT License 6 votes vote down vote up
def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e) 
Example 4
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_split.py    License: MIT License 5 votes vote down vote up
def test_repeated_cv_value_errors():
    # n_repeats is not integer or <= 0
    for cv in (RepeatedKFold, RepeatedStratifiedKFold):
        assert_raises(ValueError, cv, n_repeats=0)
        assert_raises(ValueError, cv, n_repeats=1.5) 
Example 5
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_split.py    License: MIT License 5 votes vote down vote up
def test_repeated_kfold_determinstic_split():
    X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
    random_state = 258173307
    rkf = RepeatedKFold(
        n_splits=2,
        n_repeats=2,
        random_state=random_state)

    # split should produce same and deterministic splits on
    # each call
    for _ in range(3):
        splits = rkf.split(X)
        train, test = next(splits)
        assert_array_equal(train, [2, 4])
        assert_array_equal(test, [0, 1, 3])

        train, test = next(splits)
        assert_array_equal(train, [0, 1, 3])
        assert_array_equal(test, [2, 4])

        train, test = next(splits)
        assert_array_equal(train, [0, 1])
        assert_array_equal(test, [2, 3, 4])

        train, test = next(splits)
        assert_array_equal(train, [2, 3, 4])
        assert_array_equal(test, [0, 1])

        assert_raises(StopIteration, next, splits) 
Example 6
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_split.py    License: MIT License 5 votes vote down vote up
def test_get_n_splits_for_repeated_kfold():
    n_splits = 3
    n_repeats = 4
    rkf = RepeatedKFold(n_splits, n_repeats)
    expected_n_splits = n_splits * n_repeats
    assert_equal(expected_n_splits, rkf.get_n_splits()) 
Example 7
Project: pwtools   Author: elcorto   File: rbf.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, points, values, rbf_kwds=dict(), cv_kwds=dict(ns=5, nr=1)):
        """
        Parameters
        ----------
        points, values : see :class:`Rbf`
        rbf_kwds : dict
            for ``Rbf(points, values, **rbf_kwds)``
        cv_kwds : dict
            cross-validation parameters: `ns` = `n_splits`, `nr` = `n_repeats` (see
            sklearn.model_selection.RepeatedKFold)
        """
        self.points = points
        self.values = values
        self.rbf_kwds = rbf_kwds
        self.cv_kwds = cv_kwds 
Example 8
Project: pwtools   Author: elcorto   File: rbf.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def cv(self, params):
        """K-fold repeated CV.

        Split data (points, values) randomly into K parts ("folds", K = ``ns``
        in ``self.cv_kwds``) along axis 0 and use each part once as test set,
        the rest as training set. For example `ns=5`: split in 5 parts at
        random indices, use 5 times 4/5 data for train, 1/5 for test (each of
        the folds), so 5 fits total -> 5 fit errors. Optionally repeat ``nr``
        times with different random splits. So, `nr` * `ns` fit errors total.

        Each time, build an Rbf interpolator with ``self.rbf_kwds``, fit,
        return the fit error (scalar sum of squares from
        :meth:`Rbf.fit_error`).

        Parameters
        ----------
        params : seq length 1 or 2
            | params[0] = p
            | params[1] = r (optional)

        Returns
        -------
        errs : 1d array (nr*ns,)
            direct fit error from each fold
        """
        ns = self.cv_kwds['ns']
        nr = self.cv_kwds['nr']
        errs = np.empty((ns*nr,), dtype=float)
        folds = RepeatedKFold(n_splits=ns,
                              n_repeats=nr)
        for ii, tup in enumerate(folds.split(self.points)):
            idxs_train, idxs_test = tup
            rbfi = self._get_rbfi(params,
                                  self.points[idxs_train,...],
                                  self.values[idxs_train,...])
            errs[ii] = rbfi.fit_error(self.points[idxs_test,...],
                                      self.values[idxs_test,...])
        return errs 
Example 9
Project: MAST-ML   Author: uw-cmg   File: hyper_opt.py    License: MIT License 5 votes vote down vote up
def fit(self, X, y, savepath=None, refit=True, iid=True):
        rst = dict()
        param_dict = self._get_grid_param_dict()

        if savepath is None:
            savepath = os.getcwd()

        estimator_name = self._estimator_name
        param_dict = self._search_space_generator(param_dict)

        if self.cv is None:
            self.cv = ms.RepeatedKFold()

        model = GridSearchCV(self.estimator, param_dict, scoring=self.scoring, cv=self.cv, refit=refit,
                             iid=iid, n_jobs=self.n_jobs, verbose=2)

        try:
            rst[estimator_name] = model.fit(X, y)
        except:
            log.error('Hyperparameter optimization failed, likely due to inappropriate domain of values to optimize'
                               ' one or more parameters over. Please check your input file and the sklearn docs for the mode'
                               ' you are optimizing for the domain of correct values')
            exit()

        best_estimator = rst[estimator_name].best_estimator_

        self._save_output(savepath, rst)
        return best_estimator 
Example 10
Project: MAST-ML   Author: uw-cmg   File: hyper_opt.py    License: MIT License 5 votes vote down vote up
def fit(self, X, y, savepath=None, refit=True):
        rst = dict()
        param_dict = self._get_randomized_param_dict()

        if savepath is None:
            savepath = os.getcwd()

        estimator_name = self._estimator_name

        if self.cv is None:
            self.cv = ms.RepeatedKFold()

        model = RandomizedSearchCV(self.estimator, param_dict, n_iter=self.n_iter, scoring=self.scoring, cv=self.cv,
                                   refit=refit, n_jobs=self.n_jobs, verbose=2)

        try:
            rst[estimator_name] = model.fit(X, y)
        except:
            log.error('Hyperparameter optimization failed, likely due to inappropriate domain of values to optimize'
                               ' one or more parameters over. Please check your input file and the sklearn docs for the mode'
                               ' you are optimizing for the domain of correct values')
            exit()

        best_estimator = rst[estimator_name].best_estimator_

        self._save_output(savepath, rst)
        return best_estimator 
Example 11
Project: MAST-ML   Author: uw-cmg   File: hyper_opt.py    License: MIT License 5 votes vote down vote up
def fit(self, X, y, savepath=None, refit=True):
        rst = dict()
        param_dict = self._get_bayesian_param_dict()

        if savepath is None:
            savepath = os.getcwd()

        estimator_name = self._estimator_name

        if self.cv is None:
            self.cv = ms.RepeatedKFold()

        model = BayesSearchCV(estimator=self.estimator, search_spaces=param_dict, n_iter=self.n_iter,
                              scoring=self.scoring, cv=self.cv, refit=refit, n_jobs=self.n_jobs, verbose=2)

        try:
            rst[estimator_name] = model.fit(X, y)
        except:
            log.error('Hyperparameter optimization failed, likely due to inappropriate domain of values to optimize'
                               ' one or more parameters over. Please check your input file and the sklearn docs for the mode'
                               ' you are optimizing for the domain of correct values')
            exit()

        best_estimator = rst[estimator_name].best_estimator_

        self._save_output(savepath, rst)
        return best_estimator 
Example 12
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_split.py    License: MIT License 5 votes vote down vote up
def test_repeated_cv_value_errors():
    # n_repeats is not integer or <= 0
    for cv in (RepeatedKFold, RepeatedStratifiedKFold):
        assert_raises(ValueError, cv, n_repeats=0)
        assert_raises(ValueError, cv, n_repeats=1.5) 
Example 13
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_split.py    License: MIT License 5 votes vote down vote up
def test_repeated_kfold_determinstic_split():
    X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
    random_state = 258173307
    rkf = RepeatedKFold(
        n_splits=2,
        n_repeats=2,
        random_state=random_state)

    # split should produce same and deterministic splits on
    # each call
    for _ in range(3):
        splits = rkf.split(X)
        train, test = next(splits)
        assert_array_equal(train, [2, 4])
        assert_array_equal(test, [0, 1, 3])

        train, test = next(splits)
        assert_array_equal(train, [0, 1, 3])
        assert_array_equal(test, [2, 4])

        train, test = next(splits)
        assert_array_equal(train, [0, 1])
        assert_array_equal(test, [2, 3, 4])

        train, test = next(splits)
        assert_array_equal(train, [2, 3, 4])
        assert_array_equal(test, [0, 1])

        assert_raises(StopIteration, next, splits) 
Example 14
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_split.py    License: MIT License 5 votes vote down vote up
def test_get_n_splits_for_repeated_kfold():
    n_splits = 3
    n_repeats = 4
    rkf = RepeatedKFold(n_splits, n_repeats)
    expected_n_splits = n_splits * n_repeats
    assert_equal(expected_n_splits, rkf.get_n_splits())