Python sklearn.model_selection.ShuffleSplit() Examples

The following are 30 code examples for showing how to use sklearn.model_selection.ShuffleSplit(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function .

Example 1
Project: poeai   Author: nicholastoddsmith   File: TargetingSystem.py    License: MIT License 6 votes vote down vote up
def Train(self, C, A, Y, SF):
        '''
        Train the classifier using the sample matrix A and target matrix Y
        '''
        C.fit(A, Y)
        YH = np.zeros(Y.shape, dtype = np.object)
        for i in np.array_split(np.arange(A.shape[0]), 32):   #Split up verification into chunks to prevent out of memory
            YH[i] = C.predict(A[i])
        s1 = SF(Y, YH)
        print('All:{:8.6f}'.format(s1))
        '''
        ss = ShuffleSplit(random_state = 1151)  #Use fixed state for so training can be repeated later
        trn, tst = next(ss.split(A, Y))         #Make train/test split
        mi = [8] * 1                            #Maximum number of iterations at each iter
        YH = np.zeros((A.shape[0]), dtype = np.object)
        for mic in mi:                                      #Chunk size to split dataset for CV results
            #C.SetMaxIter(mic)                               #Set the maximum number of iterations to run
            #C.fit(A[trn], Y[trn])                           #Perform training iterations
        ''' 
Example 2
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_multiclass.py    License: MIT License 6 votes vote down vote up
def test_safe_split_with_precomputed_kernel():
    clf = SVC()
    clfp = SVC(kernel="precomputed")

    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    K = np.dot(X, X.T)

    cv = ShuffleSplit(test_size=0.25, random_state=0)
    train, test = list(cv.split(X))[0]

    X_train, y_train = _safe_split(clf, X, y, train)
    K_train, y_train2 = _safe_split(clfp, K, y, train)
    assert_array_almost_equal(K_train, np.dot(X_train, X_train.T))
    assert_array_almost_equal(y_train, y_train2)

    X_test, y_test = _safe_split(clf, X, y, test, train)
    K_test, y_test2 = _safe_split(clfp, K, y, test, train)
    assert_array_almost_equal(K_test, np.dot(X_test, X_train.T))
    assert_array_almost_equal(y_test, y_test2) 
Example 3
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_split.py    License: MIT License 6 votes vote down vote up
def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e) 
Example 4
Project: chemml   Author: hachmannlab   File: active.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _update_train_test(self):
        """
        This function take care of the test_type parameter.

        """
        if self.test_type == 'passive':
            return True
        if len(self._queries) > 0:
            return True
        else:
            # active test split
            all_indices = np.concatenate([self.train_indices, self.test_indices], axis=0)
            all_y = np.concatenate([self._Y_train, self._Y_test], axis=0)
            # select randomly
            ss = ShuffleSplit(n_splits=1, test_size=self.test_size, train_size=None, random_state=90)
            for train_indices, test_indices in ss.split(all_indices):
                # test
                self._Y_test = all_y[test_indices]
                self.test_indices = all_indices[test_indices]
                # train
                self._Y_train = all_y[train_indices]
                self.train_indices = all_indices[train_indices] 
Example 5
Project: self-ensemble-visual-domain-adapt-photo   Author: Britefury   File: image_dataset.py    License: MIT License 6 votes vote down vote up
def subset_indices(d_source, d_target, subsetsize, subsetseed):
    if subsetsize > 0:
        if subsetseed != 0:
            subset_rng = np.random.RandomState(subsetseed)
        else:
            subset_rng = np.random
        strat = StratifiedShuffleSplit(n_splits=1, test_size=subsetsize, random_state=subset_rng)
        shuf = ShuffleSplit(n_splits=1, test_size=subsetsize, random_state=subset_rng)
        _, source_indices = next(strat.split(d_source.y, d_source.y))
        n_src = source_indices.shape[0]
        if d_target.has_ground_truth:
            _, target_indices = next(strat.split(d_target.y, d_target.y))
        else:
            _, target_indices = next(shuf.split(np.arange(len(d_target.images))))
        n_tgt = target_indices.shape[0]
    else:
        source_indices = None
        target_indices = None
        n_src = len(d_source.images)
        n_tgt = len(d_target.images)

    return source_indices, target_indices, n_src, n_tgt 
Example 6
Project: nonconformist   Author: donlnz   File: acp.py    License: MIT License 6 votes vote down vote up
def gen_samples(self, y, n_samples, problem_type):
		if problem_type == 'classification':
			splits = StratifiedShuffleSplit(
					n_splits=n_samples,
					test_size=self.cal_portion
				)

			split_ = splits.split(np.zeros((y.size, 1)), y)
		
		else:
			splits = ShuffleSplit(
				n_splits=n_samples,
				test_size=self.cal_portion
			)

			split_ = splits.split(np.zeros((y.size, 1)))

		for train, cal in split_:
			yield train, cal


# -----------------------------------------------------------------------------
# Conformal ensemble
# ----------------------------------------------------------------------------- 
Example 7
Project: cs-ranking   Author: kiudee   File: expedia_dataset_reader.py    License: Apache License 2.0 6 votes vote down vote up
def get_single_train_test_split(self):
        splits = dict()
        cv_iter = ShuffleSplit(
            n_splits=1, random_state=self.random_state, test_size=0.80
        )
        for n_obj, arr in self.X_dict.items():
            if arr.shape[0] == 1:
                splits[n_obj] = ([0], [0])
            else:
                splits[n_obj] = list(cv_iter.split(arr))[0]
        self.X_train = dict()
        self.Y_train = dict()
        self.X_test = dict()
        self.Y_test = dict()
        for n_obj, itr in splits.items():
            train_idx, test_idx = itr
            self.X_train[n_obj] = np.copy(self.X_dict[n_obj][train_idx])
            self.X_test[n_obj] = np.copy(self.X_dict[n_obj][test_idx])
            self.Y_train[n_obj] = np.copy(self.Y_dict[n_obj][train_idx])
            self.Y_test[n_obj] = np.copy(self.Y_dict[n_obj][test_idx])
        self.X, self.Y = self.sub_sampling_from_dictionary()
        self.__check_dataset_validity__()
        self.X, self.X_test = standardize_features(self.X, self.X_test)
        return self.X, self.Y, self.X_test, self.Y_test 
Example 8
Project: SyBrain   Author: 5ymph0en1x   File: stockpredictor.py    License: GNU General Public License v3.0 6 votes vote down vote up
def TestPerformance(self, df=None):
        # If no dataframe is provided, use the currently learned one
        if (df is None):
            D = self.D
        else:
            D = self.S.transform(df.copy())
        # Get features from the data frame
        A = self._ExtractFeat(D)
        # Get the target values and their corresponding column names
        y, _ = self._ExtractTarg(D)
        # Begin cross validation
        ss = ShuffleSplit(n_splits=1)
        for trn, tst in ss.split(A):
            s1 = cross_val_score(self.R, A, y, cv=3, scoring=make_scorer(r2_score))
            s2 = cross_val_score(self.R, A[tst], y[tst], cv=3, scoring=make_scorer(r2_score))
            s3 = cross_val_score(self.R, A[trn], y[trn], cv=3, scoring=make_scorer(r2_score))
            print('C-V:\t' + str(s1) + '\nTst:\t' + str(s2) + '\nTrn:\t' + str(s3)) 
Example 9
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_multiclass.py    License: MIT License 6 votes vote down vote up
def test_safe_split_with_precomputed_kernel():
    clf = SVC()
    clfp = SVC(kernel="precomputed")

    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    K = np.dot(X, X.T)

    cv = ShuffleSplit(test_size=0.25, random_state=0)
    train, test = list(cv.split(X))[0]

    X_train, y_train = _safe_split(clf, X, y, train)
    K_train, y_train2 = _safe_split(clfp, K, y, train)
    assert_array_almost_equal(K_train, np.dot(X_train, X_train.T))
    assert_array_almost_equal(y_train, y_train2)

    X_test, y_test = _safe_split(clf, X, y, test, train)
    K_test, y_test2 = _safe_split(clfp, K, y, test, train)
    assert_array_almost_equal(K_test, np.dot(X_test, X_train.T))
    assert_array_almost_equal(y_test, y_test2) 
Example 10
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_split.py    License: MIT License 6 votes vote down vote up
def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e) 
Example 11
Project: EDeN   Author: fabriziocosta   File: estimator_utils.py    License: MIT License 5 votes vote down vote up
def estimate_predictive_performance(x_y,
                                    estimator=None,
                                    n_splits=10,
                                    random_state=1):
    """estimate_predictive_performance."""
    x, y = x_y
    cv = ShuffleSplit(n_splits=n_splits,
                      test_size=0.3,
                      random_state=random_state)
    scoring = make_scorer(average_precision_score)
    scores = cross_val_score(estimator, x, y, cv=cv, scoring=scoring)
    return scores 
Example 12
Project: HungaBunga   Author: ypeleg   File: core.py    License: MIT License 5 votes vote down vote up
def cv_reg(x, test_size = 0.2, n_splits = 5, random_state=None): return ss(n_splits, test_size, random_state=random_state).split(x) 
Example 13
Project: verde   Author: fatiando   File: test_model_selection.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_cross_val_score_client(trend):
    "Test the deprecated dask Client interface"
    coords, data = trend[:2]
    model = Trend(degree=1)
    nsplits = 5
    cross_validator = ShuffleSplit(n_splits=nsplits, random_state=0)
    client = Client(processes=False)
    futures = cross_val_score(model, coords, data, cv=cross_validator, client=client)
    scores = [future.result() for future in futures]
    client.close()
    assert len(scores) == nsplits
    npt.assert_allclose(scores, 1) 
Example 14
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_split.py    License: MIT License 5 votes vote down vote up
def test_shuffle_split():
    ss1 = ShuffleSplit(test_size=0.2, random_state=0).split(X)
    ss2 = ShuffleSplit(test_size=2, random_state=0).split(X)
    ss3 = ShuffleSplit(test_size=np.int32(2), random_state=0).split(X)
    ss4 = ShuffleSplit(test_size=int(2), random_state=0).split(X)
    for t1, t2, t3, t4 in zip(ss1, ss2, ss3, ss4):
        assert_array_equal(t1[0], t2[0])
        assert_array_equal(t2[0], t3[0])
        assert_array_equal(t3[0], t4[0])
        assert_array_equal(t1[1], t2[1])
        assert_array_equal(t2[1], t3[1])
        assert_array_equal(t3[1], t4[1]) 
Example 15
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_split.py    License: MIT License 5 votes vote down vote up
def test_shufflesplit_errors(test_size, train_size):
    with pytest.raises(ValueError):
        next(ShuffleSplit(test_size=test_size, train_size=train_size).split(X)) 
Example 16
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 5 votes vote down vote up
def test_fit_and_score_working():
    X, y = make_classification(n_samples=30, random_state=0)
    clf = SVC(kernel="linear", random_state=0)
    train, test = next(ShuffleSplit().split(X))
    # Test return_parameters option
    fit_and_score_args = [clf, X, y, dict(), train, test, 0]
    fit_and_score_kwargs = {'parameters': {'max_iter': 100, 'tol': 0.1},
                            'fit_params': None,
                            'return_parameters': True}
    result = _fit_and_score(*fit_and_score_args,
                            **fit_and_score_kwargs)
    assert result[-1] == fit_and_score_kwargs['parameters'] 
Example 17
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 5 votes vote down vote up
def test_fit_and_score_verbosity(capsys, return_train_score, scorer, expected):
    X, y = make_classification(n_samples=30, random_state=0)
    clf = SVC(kernel="linear", random_state=0)
    train, test = next(ShuffleSplit().split(X))

    # test print without train score
    fit_and_score_args = [clf, X, y, scorer, train, test, 10, None, None]
    fit_and_score_kwargs = {'return_train_score': return_train_score}
    _fit_and_score(*fit_and_score_args, **fit_and_score_kwargs)
    out, _ = capsys.readouterr()
    assert out.split('\n')[1] == expected 
Example 18
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_sgd.py    License: MIT License 5 votes vote down vote up
def test_validation_set_not_used_for_training(klass):
    X, Y = iris.data, iris.target
    validation_fraction = 0.4
    seed = 42
    shuffle = False
    max_iter = 10
    clf1 = klass(early_stopping=True,
                 random_state=np.random.RandomState(seed),
                 validation_fraction=validation_fraction,
                 learning_rate='constant', eta0=0.01,
                 tol=None, max_iter=max_iter, shuffle=shuffle)
    clf1.fit(X, Y)
    assert clf1.n_iter_ == max_iter

    clf2 = klass(early_stopping=False,
                 random_state=np.random.RandomState(seed),
                 learning_rate='constant', eta0=0.01,
                 tol=None, max_iter=max_iter, shuffle=shuffle)

    if is_classifier(clf2):
        cv = StratifiedShuffleSplit(test_size=validation_fraction,
                                    random_state=seed)
    else:
        cv = ShuffleSplit(test_size=validation_fraction,
                          random_state=seed)
    idx_train, idx_val = next(cv.split(X, Y))
    idx_train = np.sort(idx_train)  # remove shuffling
    clf2.fit(X[idx_train], Y[idx_train])
    assert clf2.n_iter_ == max_iter

    assert_array_equal(clf1.coef_, clf2.coef_) 
Example 19
Project: cherry   Author: Windsooon   File: displayer.py    License: MIT License 5 votes vote down vote up
def display_learning_curve(self, vectorizer, clf, x_data, y_data):
        title = "Learning Curves"
        text_clf = Pipeline([
            ('vectorizer', vectorizer),
            ('clf', clf)])
        cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
        self.plot_learning_curve(text_clf, title, x_data, y_data, ylim=(0.7, 1.01), cv=cv, n_jobs=-1) 
Example 20
Project: deep_pipe   Author: neuro-ml   File: base.py    License: MIT License 5 votes vote down vote up
def train_test_split_groups(X, *, val_size, groups=None, **kwargs):
    split_class = (ShuffleSplit if groups is None else GroupShuffleSplit)
    split = split_class(test_size=val_size, **kwargs)
    train, val = next(split.split(X=X, groups=groups))
    return X[train], X[val] 
Example 21
Project: skorch   Author: skorch-dev   File: dataset.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _check_cv_float(self):
        cv_cls = StratifiedShuffleSplit if self.stratified else ShuffleSplit
        return cv_cls(test_size=self.cv, random_state=self.random_state) 
Example 22
Project: Quadflor   Author: quadflor   File: br_kneighbor_classifier.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _get_split(X, y):
        split = ShuffleSplit(y.shape[0], n_iter=1)
        train, validate = list(split)[0]
        X_train, X_validate, y_train, y_validate = X[train], X[validate], y[train], y[validate]
        return X_train, X_validate, y_train, y_validate 
Example 23
Project: ramp-workflow   Author: paris-saclay-cds   File: clustering.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_cv(self, X, y):
        unique_event_ids = np.unique(y[:, 0])
        event_cv = ShuffleSplit(
            n_splits=self.n_cv, test_size=self.cv_test_size,
            random_state=self.random_state)
        for train_event_is, test_event_is in event_cv.split(unique_event_ids):
            train_is = np.where(
                np.in1d(y[:, 0], unique_event_ids[train_event_is]))[0]
            test_is = np.where(
                np.in1d(y[:, 0], unique_event_ids[test_event_is]))[0]
            yield train_is, test_is 
Example 24
Project: ramp-workflow   Author: paris-saclay-cds   File: problem.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_cv(X, y):
    cv = ShuffleSplit(n_splits=2, test_size=0.2, random_state=57)
    return cv.split(X) 
Example 25
Project: ramp-workflow   Author: paris-saclay-cds   File: problem.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_cv(X, y):
    cv = ShuffleSplit(n_splits=2, test_size=0.2, random_state=57)
    return cv.split(X) 
Example 26
Project: ramp-workflow   Author: paris-saclay-cds   File: problem.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_cv(X, y):
    cv = ShuffleSplit(n_splits=8, test_size=0.5, random_state=57)
    return cv.split(X) 
Example 27
Project: ramp-workflow   Author: paris-saclay-cds   File: problem.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_cv(X, y):
    cv = ShuffleSplit(n_splits=8, test_size=0.5, random_state=57)
    return cv.split(X) 
Example 28
Project: gumpy   Author: gumpy-bci   File: split.py    License: MIT License 5 votes vote down vote up
def  stratified_KFold(features, labels, n_splits):

    """Stratified K-Folds cross-validator
     Stratification is the process of rearranging the data as to ensure each fold is a good representative of the whole
     and by also keeping the balance of classes
    """
    skf = StratifiedKFold(n_splits)
    skf.get_n_splits(features, labels)
    for train_index, test_index in skf.split(features, labels):
        X_train, X_test = features[train_index], features[test_index]
        Y_train, Y_test = labels[train_index], labels[test_index]
    return X_train, X_test, Y_train, Y_test

#Stratified ShuffleSplit cross-validator 
Example 29
Project: gumpy   Author: gumpy-bci   File: split.py    License: MIT License 5 votes vote down vote up
def  stratified_shuffle_Split(features, labels, n_splits,test_size,random_state):

    """Stratified ShuffleSplit cross-validator
    """
    cv = StratifiedShuffleSplit(n_splits, test_size, random_state=random_state)
    for train_index, test_index in cv.split(features,labels):
        X_train = features[train_index]
        X_test = features[test_index]
        Y_train = labels[train_index]
        Y_test = labels[test_index]
    return X_train, X_test, Y_train, Y_test


#Random permutation cross-validator 
Example 30
Project: gumpy   Author: gumpy-bci   File: split.py    License: MIT License 5 votes vote down vote up
def  shuffle_Split(features, labels, n_splits,test_size,random_state):

    """ShuffleSplit: Random permutation cross-validator
    """
    cv = ShuffleSplit(n_splits, test_size, random_state=random_state)
    for train_index, test_index in cv.split(features):
        X_train = features[train_index]
        X_test = features[test_index]
        Y_train = labels[train_index]
        Y_test = labels[test_index]
    return X_train, X_test, Y_train, Y_test