Python sklearn.datasets.make_hastie_10_2() Examples

The following are 30 code examples for showing how to use sklearn.datasets.make_hastie_10_2(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.datasets , or try the search function .

Example 1
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 6 votes vote down vote up
def test_warm_start(random_state=42):
    # Test if fitting incrementally with warm start gives a forest of the
    # right size and the same results as a normal fit.
    X, y = make_hastie_10_2(n_samples=20, random_state=1)

    clf_ws = None
    for n_estimators in [5, 10]:
        if clf_ws is None:
            clf_ws = BaggingClassifier(n_estimators=n_estimators,
                                       random_state=random_state,
                                       warm_start=True)
        else:
            clf_ws.set_params(n_estimators=n_estimators)
        clf_ws.fit(X, y)
        assert_equal(len(clf_ws), n_estimators)

    clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state,
                                  warm_start=False)
    clf_no_ws.fit(X, y)

    assert_equal(set([tree.random_state for tree in clf_ws]),
                 set([tree.random_state for tree in clf_no_ws])) 
Example 2
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 6 votes vote down vote up
def test_warm_start_equal_n_estimators():
    # Test that nothing happens when fitting without increasing n_estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    # modify X to nonsense values, this should not change anything
    X_train += 1.

    assert_warns_message(UserWarning,
                         "Warm-start fitting without increasing n_estimators does not",
                         clf.fit, X_train, y_train)
    assert_array_equal(y_pred, clf.predict(X_test)) 
Example 3
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 6 votes vote down vote up
def test_warm_start_equivalence():
    # warm started classifier with 5+5 estimators should be equivalent to
    # one classifier with 10 estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf_ws = BaggingClassifier(n_estimators=5, warm_start=True,
                               random_state=3141)
    clf_ws.fit(X_train, y_train)
    clf_ws.set_params(n_estimators=10)
    clf_ws.fit(X_train, y_train)
    y1 = clf_ws.predict(X_test)

    clf = BaggingClassifier(n_estimators=10, warm_start=False,
                            random_state=3141)
    clf.fit(X_train, y_train)
    y2 = clf.predict(X_test)

    assert_array_almost_equal(y1, y2) 
Example 4
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 6 votes vote down vote up
def check_classification_synthetic(presort, loss):
    # Test GradientBoostingClassifier on synthetic dataset used by
    # Hastie et al. in ESLII Example 12.7.
    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)

    X_train, X_test = X[:2000], X[2000:]
    y_train, y_test = y[:2000], y[2000:]

    gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=2,
                                      max_depth=1, loss=loss,
                                      learning_rate=1.0, random_state=0)
    gbrt.fit(X_train, y_train)
    error_rate = (1.0 - gbrt.score(X_test, y_test))
    assert_less(error_rate, 0.09)

    gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=2,
                                      max_depth=1, loss=loss,
                                      learning_rate=1.0, subsample=0.5,
                                      random_state=0,
                                      presort=presort)
    gbrt.fit(X_train, y_train)
    error_rate = (1.0 - gbrt.score(X_test, y_test))
    assert_less(error_rate, 0.08) 
Example 5
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 6 votes vote down vote up
def test_check_inputs_predict_stages():
    # check that predict_stages through an error if the type of X is not
    # supported
    x, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    x_sparse_csc = csc_matrix(x)
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
    clf.fit(x, y)
    score = np.zeros((y.shape)).reshape(-1, 1)
    assert_raise_message(ValueError,
                         "When X is a sparse matrix, a CSR format is expected",
                         predict_stages, clf.estimators_, x_sparse_csc,
                         clf.learning_rate, score)
    x_fortran = np.asfortranarray(x)
    assert_raise_message(ValueError,
                         "X should be C-ordered np.ndarray",
                         predict_stages, clf.estimators_, x_fortran,
                         clf.learning_rate, score) 
Example 6
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 6 votes vote down vote up
def test_warm_start(Cls):
    # Test if warm start equals fit.
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    est = Cls(n_estimators=200, max_depth=1)
    est.fit(X, y)

    est_ws = Cls(n_estimators=100, max_depth=1, warm_start=True)
    est_ws.fit(X, y)
    est_ws.set_params(n_estimators=200)
    est_ws.fit(X, y)

    if Cls is GradientBoostingRegressor:
        assert_array_almost_equal(est_ws.predict(X), est.predict(X))
    else:
        # Random state is preserved and hence predict_proba must also be
        # same
        assert_array_equal(est_ws.predict(X), est.predict(X))
        assert_array_almost_equal(est_ws.predict_proba(X),
                                  est.predict_proba(X)) 
Example 7
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 6 votes vote down vote up
def test_warm_start_fortran(Cls):
    # Test that feeding a X in Fortran-ordered is giving the same results as
    # in C-ordered
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    est_c = Cls(n_estimators=1, random_state=1, warm_start=True)
    est_fortran = Cls(n_estimators=1, random_state=1, warm_start=True)

    est_c.fit(X, y)
    est_c.set_params(n_estimators=11)
    est_c.fit(X, y)

    X_fortran = np.asfortranarray(X)
    est_fortran.fit(X_fortran, y)
    est_fortran.set_params(n_estimators=11)
    est_fortran.fit(X_fortran, y)

    assert_array_almost_equal(est_c.predict(X), est_fortran.predict(X)) 
Example 8
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bagging.py    License: MIT License 6 votes vote down vote up
def test_warm_start(random_state=42):
    # Test if fitting incrementally with warm start gives a forest of the
    # right size and the same results as a normal fit.
    X, y = make_hastie_10_2(n_samples=20, random_state=1)

    clf_ws = None
    for n_estimators in [5, 10]:
        if clf_ws is None:
            clf_ws = BaggingClassifier(n_estimators=n_estimators,
                                       random_state=random_state,
                                       warm_start=True)
        else:
            clf_ws.set_params(n_estimators=n_estimators)
        clf_ws.fit(X, y)
        assert_equal(len(clf_ws), n_estimators)

    clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state,
                                  warm_start=False)
    clf_no_ws.fit(X, y)

    assert_equal(set([tree.random_state for tree in clf_ws]),
                 set([tree.random_state for tree in clf_no_ws])) 
Example 9
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bagging.py    License: MIT License 6 votes vote down vote up
def test_warm_start_equal_n_estimators():
    # Test that nothing happens when fitting without increasing n_estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    # modify X to nonsense values, this should not change anything
    X_train += 1.

    assert_warns_message(UserWarning,
                         "Warm-start fitting without increasing n_estimators does not",
                         clf.fit, X_train, y_train)
    assert_array_equal(y_pred, clf.predict(X_test)) 
Example 10
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bagging.py    License: MIT License 6 votes vote down vote up
def test_warm_start_equivalence():
    # warm started classifier with 5+5 estimators should be equivalent to
    # one classifier with 10 estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf_ws = BaggingClassifier(n_estimators=5, warm_start=True,
                               random_state=3141)
    clf_ws.fit(X_train, y_train)
    clf_ws.set_params(n_estimators=10)
    clf_ws.fit(X_train, y_train)
    y1 = clf_ws.predict(X_test)

    clf = BaggingClassifier(n_estimators=10, warm_start=False,
                            random_state=3141)
    clf.fit(X_train, y_train)
    y2 = clf.predict(X_test)

    assert_array_almost_equal(y1, y2) 
Example 11
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_gradient_boosting.py    License: MIT License 6 votes vote down vote up
def check_classification_synthetic(presort, loss):
    # Test GradientBoostingClassifier on synthetic dataset used by
    # Hastie et al. in ESLII Example 12.7.
    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)

    X_train, X_test = X[:2000], X[2000:]
    y_train, y_test = y[:2000], y[2000:]

    gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=2,
                                      max_depth=1, loss=loss,
                                      learning_rate=1.0, random_state=0)
    gbrt.fit(X_train, y_train)
    error_rate = (1.0 - gbrt.score(X_test, y_test))
    assert_less(error_rate, 0.09)

    gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=2,
                                      max_depth=1, loss=loss,
                                      learning_rate=1.0, subsample=0.5,
                                      random_state=0,
                                      presort=presort)
    gbrt.fit(X_train, y_train)
    error_rate = (1.0 - gbrt.score(X_test, y_test))
    assert_less(error_rate, 0.08) 
Example 12
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_gradient_boosting.py    License: MIT License 6 votes vote down vote up
def test_warm_start_oob():
    # Test if warm start OOB equals fit.
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
        est = Cls(n_estimators=200, max_depth=1, subsample=0.5,
                  random_state=1)
        est.fit(X, y)

        est_ws = Cls(n_estimators=100, max_depth=1, subsample=0.5,
                     random_state=1, warm_start=True)
        est_ws.fit(X, y)
        est_ws.set_params(n_estimators=200)
        est_ws.fit(X, y)

        assert_array_almost_equal(est_ws.oob_improvement_[:100],
                                  est.oob_improvement_[:100]) 
Example 13
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 5 votes vote down vote up
def test_warm_start_smaller_n_estimators():
    # Test if warm start'ed second fit with smaller n_estimators raises error.
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    clf = BaggingClassifier(n_estimators=5, warm_start=True)
    clf.fit(X, y)
    clf.set_params(n_estimators=4)
    assert_raises(ValueError, clf.fit, X, y) 
Example 14
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 5 votes vote down vote up
def test_warm_start_with_oob_score_fails():
    # Check using oob_score and warm_start simultaneously fails
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    clf = BaggingClassifier(n_estimators=5, warm_start=True, oob_score=True)
    assert_raises(ValueError, clf.fit, X, y) 
Example 15
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 5 votes vote down vote up
def test_oob_score_consistency():
    # Make sure OOB scores are identical when random_state, estimator, and
    # training data are fixed and fitting is done twice
    X, y = make_hastie_10_2(n_samples=200, random_state=1)
    bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5,
                                max_features=0.5, oob_score=True,
                                random_state=1)
    assert_equal(bagging.fit(X, y).oob_score_, bagging.fit(X, y).oob_score_) 
Example 16
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 5 votes vote down vote up
def test_estimators_samples():
    # Check that format of estimators_samples_ is correct and that results
    # generated at fit time can be identically reproduced at a later time
    # using data saved in object attributes.
    X, y = make_hastie_10_2(n_samples=200, random_state=1)
    bagging = BaggingClassifier(LogisticRegression(), max_samples=0.5,
                                max_features=0.5, random_state=1,
                                bootstrap=False)
    bagging.fit(X, y)

    # Get relevant attributes
    estimators_samples = bagging.estimators_samples_
    estimators_features = bagging.estimators_features_
    estimators = bagging.estimators_

    # Test for correct formatting
    assert_equal(len(estimators_samples), len(estimators))
    assert_equal(len(estimators_samples[0]), len(X) // 2)
    assert_equal(estimators_samples[0].dtype.kind, 'i')

    # Re-fit single estimator to test for consistent sampling
    estimator_index = 0
    estimator_samples = estimators_samples[estimator_index]
    estimator_features = estimators_features[estimator_index]
    estimator = estimators[estimator_index]

    X_train = (X[estimator_samples])[:, estimator_features]
    y_train = y[estimator_samples]

    orig_coefs = estimator.coef_
    estimator.fit(X_train, y_train)
    new_coefs = estimator.coef_

    assert_array_almost_equal(orig_coefs, new_coefs) 
Example 17
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 5 votes vote down vote up
def test_max_samples_consistency():
    # Make sure validated max_samples and original max_samples are identical
    # when valid integer max_samples supplied by user
    max_samples = 100
    X, y = make_hastie_10_2(n_samples=2*max_samples, random_state=1)
    bagging = BaggingClassifier(KNeighborsClassifier(),
                                max_samples=max_samples,
                                max_features=0.5, random_state=1)
    bagging.fit(X, y)
    assert_equal(bagging._max_samples, max_samples) 
Example 18
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_max_feature_auto():
    # Test if max features is set properly for floats and str.
    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
    _, n_features = X.shape

    X_train = X[:2000]
    y_train = y[:2000]

    gbrt = GradientBoostingClassifier(n_estimators=1, max_features='auto')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='auto')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, n_features)

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features=0.3)
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(n_features * 0.3))

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='sqrt')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='log2')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(np.log2(n_features)))

    gbrt = GradientBoostingRegressor(n_estimators=1,
                                     max_features=0.01 / X.shape[1])
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, 1) 
Example 19
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_staged_predict_proba():
    # Test whether staged predict proba eventually gives
    # the same prediction.
    X, y = datasets.make_hastie_10_2(n_samples=1200,
                                     random_state=1)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]
    clf = GradientBoostingClassifier(n_estimators=20)
    # test raise NotFittedError if not fitted
    assert_raises(NotFittedError, lambda X: np.fromiter(
        clf.staged_predict_proba(X), dtype=np.float64), X_test)

    clf.fit(X_train, y_train)

    # test if prediction for last stage equals ``predict``
    for y_pred in clf.staged_predict(X_test):
        assert_equal(y_test.shape, y_pred.shape)

    assert_array_equal(clf.predict(X_test), y_pred)

    # test if prediction for last stage equals ``predict_proba``
    for staged_proba in clf.staged_predict_proba(X_test):
        assert_equal(y_test.shape[0], staged_proba.shape[0])
        assert_equal(2, staged_proba.shape[1])

    assert_array_almost_equal(clf.predict_proba(X_test), staged_proba) 
Example 20
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_warm_start_n_estimators(Cls):
    # Test if warm start equals fit - set n_estimators.
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    est = Cls(n_estimators=300, max_depth=1)
    est.fit(X, y)

    est_ws = Cls(n_estimators=100, max_depth=1, warm_start=True)
    est_ws.fit(X, y)
    est_ws.set_params(n_estimators=300)
    est_ws.fit(X, y)

    assert_array_almost_equal(est_ws.predict(X), est.predict(X)) 
Example 21
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_warm_start_max_depth(Cls):
    # Test if possible to fit trees of different depth in ensemble.
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    est = Cls(n_estimators=100, max_depth=1, warm_start=True)
    est.fit(X, y)
    est.set_params(n_estimators=110, max_depth=2)
    est.fit(X, y)

    # last 10 trees have different depth
    assert_equal(est.estimators_[0, 0].max_depth, 1)
    for i in range(1, 11):
        assert_equal(est.estimators_[-i, 0].max_depth, 2) 
Example 22
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_warm_start_zero_n_estimators(Cls):
    # Test if warm start with zero n_estimators raises error
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    est = Cls(n_estimators=100, max_depth=1, warm_start=True)
    est.fit(X, y)
    est.set_params(n_estimators=0)
    assert_raises(ValueError, est.fit, X, y) 
Example 23
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_warm_start_smaller_n_estimators(Cls):
    # Test if warm start with smaller n_estimators raises error
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    est = Cls(n_estimators=100, max_depth=1, warm_start=True)
    est.fit(X, y)
    est.set_params(n_estimators=99)
    assert_raises(ValueError, est.fit, X, y) 
Example 24
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_warm_start_equal_n_estimators(Cls):
    # Test if warm start with equal n_estimators does nothing
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    est = Cls(n_estimators=100, max_depth=1)
    est.fit(X, y)

    est2 = clone(est)
    est2.set_params(n_estimators=est.n_estimators, warm_start=True)
    est2.fit(X, y)

    assert_array_almost_equal(est2.predict(X), est.predict(X)) 
Example 25
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_warm_start_oob_switch(Cls):
    # Test if oob can be turned on during warm start.
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    est = Cls(n_estimators=100, max_depth=1, warm_start=True)
    est.fit(X, y)
    est.set_params(n_estimators=110, subsample=0.5)
    est.fit(X, y)

    assert_array_equal(est.oob_improvement_[:100], np.zeros(100))
    # the last 10 are not zeros
    assert_array_equal(est.oob_improvement_[-10:] == 0.0,
                       np.zeros(10, dtype=np.bool)) 
Example 26
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_warm_start_oob(Cls):
    # Test if warm start OOB equals fit.
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    est = Cls(n_estimators=200, max_depth=1, subsample=0.5,
              random_state=1)
    est.fit(X, y)

    est_ws = Cls(n_estimators=100, max_depth=1, subsample=0.5,
                 random_state=1, warm_start=True)
    est_ws.fit(X, y)
    est_ws.set_params(n_estimators=200)
    est_ws.fit(X, y)

    assert_array_almost_equal(est_ws.oob_improvement_[:100],
                              est.oob_improvement_[:100]) 
Example 27
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_monitor_early_stopping(Cls):
    # Test if monitor return value works.
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)

    est = Cls(n_estimators=20, max_depth=1, random_state=1, subsample=0.5)
    est.fit(X, y, monitor=early_stopping_monitor)
    assert_equal(est.n_estimators, 20)  # this is not altered
    assert_equal(est.estimators_.shape[0], 10)
    assert_equal(est.train_score_.shape[0], 10)
    assert_equal(est.oob_improvement_.shape[0], 10)

    # try refit
    est.set_params(n_estimators=30)
    est.fit(X, y)
    assert_equal(est.n_estimators, 30)
    assert_equal(est.estimators_.shape[0], 30)
    assert_equal(est.train_score_.shape[0], 30)

    est = Cls(n_estimators=20, max_depth=1, random_state=1, subsample=0.5,
              warm_start=True)
    est.fit(X, y, monitor=early_stopping_monitor)
    assert_equal(est.n_estimators, 20)
    assert_equal(est.estimators_.shape[0], 10)
    assert_equal(est.train_score_.shape[0], 10)
    assert_equal(est.oob_improvement_.shape[0], 10)

    # try refit
    est.set_params(n_estimators=30, warm_start=False)
    est.fit(X, y)
    assert_equal(est.n_estimators, 30)
    assert_equal(est.train_score_.shape[0], 30)
    assert_equal(est.estimators_.shape[0], 30)
    assert_equal(est.oob_improvement_.shape[0], 30) 
Example 28
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_complete_classification():
    # Test greedy trees with max_depth + 1 leafs.
    from sklearn.tree._tree import TREE_LEAF
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    k = 4

    est = GradientBoostingClassifier(n_estimators=20, max_depth=None,
                                     random_state=1, max_leaf_nodes=k + 1)
    est.fit(X, y)

    tree = est.estimators_[0, 0].tree_
    assert_equal(tree.max_depth, k)
    assert_equal(tree.children_left[tree.children_left == TREE_LEAF].shape[0],
                 k + 1) 
Example 29
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_max_leaf_nodes_max_depth(GBEstimator):
    # Test precedence of max_leaf_nodes over max_depth.
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)

    k = 4

    est = GBEstimator(max_depth=1, max_leaf_nodes=k).fit(X, y)
    tree = est.estimators_[0, 0].tree_
    assert_equal(tree.max_depth, 1)

    est = GBEstimator(max_depth=1).fit(X, y)
    tree = est.estimators_[0, 0].tree_
    assert_equal(tree.max_depth, 1) 
Example 30
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_min_impurity_split(GBEstimator):
    # Test if min_impurity_split of base estimators is set
    # Regression test for #8006
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)

    est = GBEstimator(min_impurity_split=0.1)
    est = assert_warns_message(DeprecationWarning, "min_impurity_decrease",
                               est.fit, X, y)
    for tree in est.estimators_.flat:
        assert_equal(tree.min_impurity_split, 0.1)