Python sklearn.ensemble.BaggingClassifier() Examples
The following are 30 code examples for showing how to use sklearn.ensemble.BaggingClassifier(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
You may check out the related API usage on the sidebar.
You may also want to check out all available functions/classes of the module
sklearn.ensemble
, or try the search function
.
Example 1
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 6 votes |
def test_classification(): # Check classification for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [1, 2, 4], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyClassifier(), Perceptron(tol=1e-3), DecisionTreeClassifier(), KNeighborsClassifier(), SVC(gamma="scale")]: for params in grid: BaggingClassifier(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
Example 2
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 6 votes |
def test_warm_start(random_state=42): # Test if fitting incrementally with warm start gives a forest of the # right size and the same results as a normal fit. X, y = make_hastie_10_2(n_samples=20, random_state=1) clf_ws = None for n_estimators in [5, 10]: if clf_ws is None: clf_ws = BaggingClassifier(n_estimators=n_estimators, random_state=random_state, warm_start=True) else: clf_ws.set_params(n_estimators=n_estimators) clf_ws.fit(X, y) assert_equal(len(clf_ws), n_estimators) clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state, warm_start=False) clf_no_ws.fit(X, y) assert_equal(set([tree.random_state for tree in clf_ws]), set([tree.random_state for tree in clf_no_ws]))
Example 3
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 6 votes |
def test_warm_start_equal_n_estimators(): # Test that nothing happens when fitting without increasing n_estimators X, y = make_hastie_10_2(n_samples=20, random_state=1) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43) clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # modify X to nonsense values, this should not change anything X_train += 1. assert_warns_message(UserWarning, "Warm-start fitting without increasing n_estimators does not", clf.fit, X_train, y_train) assert_array_equal(y_pred, clf.predict(X_test))
Example 4
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 6 votes |
def test_warm_start_equivalence(): # warm started classifier with 5+5 estimators should be equivalent to # one classifier with 10 estimators X, y = make_hastie_10_2(n_samples=20, random_state=1) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43) clf_ws = BaggingClassifier(n_estimators=5, warm_start=True, random_state=3141) clf_ws.fit(X_train, y_train) clf_ws.set_params(n_estimators=10) clf_ws.fit(X_train, y_train) y1 = clf_ws.predict(X_test) clf = BaggingClassifier(n_estimators=10, warm_start=False, random_state=3141) clf.fit(X_train, y_train) y2 = clf.predict(X_test) assert_array_almost_equal(y1, y2)
Example 5
Project: Hanhan-Spark-Python Author: hanhanwu File: random_forest_with_bagging.py License: MIT License | 6 votes |
def main(): indata = np.load(inputs) training_data = indata['data_training'] training_labels = indata['label_training'] validation_data = indata['data_val'] validation_labels = indata['label_val'] ts = range(1,11) sampling_rates = [round(0.1*t, 1) for t in ts] forest_sizes = [10, 20, 50, 100] for sampling_rate in sampling_rates: legend_label = 'sampling rate='+str(sampling_rate) accuracy_results = [] for forest_size in forest_sizes: rf_clf = ensemble.BaggingClassifier(n_estimators=forest_size, max_samples=sampling_rate) rf_clf.fit(training_data, training_labels) predictions = rf_clf.predict(validation_data) accuracy = metrics.accuracy_score(validation_labels, predictions) accuracy_results.append(accuracy) plt.plot(range(len(forest_sizes)), accuracy_results, label=legend_label) plt.xticks(range(len(forest_sizes)), forest_sizes, size='small') plt.legend() plt.show()
Example 6
Project: Awesome-Scripts Author: DedSecInside File: BaggedLDA.py License: MIT License | 6 votes |
def main(): # prepare data trainingSet=[] testSet=[] accuracy = 0.0 split = 0.25 loadDataset('../Dataset/LDAdata.csv', split, trainingSet, testSet) print('Train set: ' + repr(len(trainingSet))) print('Test set: ' + repr(len(testSet))) trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1] columns = trainData.shape[1] X = np.array(trainData) y = np.array(trainingSet)[:,columns] clf = BaggingClassifier(LDA()) clf.fit(X, y) testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1] X_test = np.array(testData) y_test = np.array(testSet)[:,columns] accuracy = clf.score(X_test,y_test) accuracy *= 100 print("Accuracy %:",accuracy)
Example 7
Project: Awesome-Scripts Author: DedSecInside File: BaggedQDA.py License: MIT License | 6 votes |
def main(): # prepare data trainingSet=[] testSet=[] accuracy = 0.0 split = 0.25 loadDataset('../Dataset/combined.csv', split, trainingSet, testSet) print 'Train set: ' + repr(len(trainingSet)) print 'Test set: ' + repr(len(testSet)) # generate predictions predictions=[] trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1] columns = trainData.shape[1] X = np.array(trainData) y = np.array(trainingSet)[:,columns] clf = BaggingClassifier(QDA()) clf.fit(X, y) testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1] X_test = np.array(testData) y_test = np.array(testSet)[:,columns] accuracy = clf.score(X_test,y_test) accuracy *= 100 print("Accuracy %:",accuracy)
Example 8
Project: Awesome-Scripts Author: DedSecInside File: BaggedKNN.py License: MIT License | 6 votes |
def main(): # prepare data trainingSet=[] testSet=[] accuracy = 0.0 split = 0.25 loadDataset('../Dataset/combined.csv', split, trainingSet, testSet) print 'Train set: ' + repr(len(trainingSet)) print 'Test set: ' + repr(len(testSet)) # generate predictions predictions=[] trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1] columns = trainData.shape[1] X = np.array(trainData) y = np.array(trainingSet)[:,columns] clf = BaggingClassifier(KNN(n_neighbors=10, weights='uniform', algorithm='auto', leaf_size=10, p=1, metric='minkowski', metric_params=None, n_jobs=1)) clf.fit(X, y) testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1] X_test = np.array(testData) y_test = np.array(testSet)[:,columns] accuracy = clf.score(X_test,y_test) accuracy *= 100 print("Accuracy %:",accuracy)
Example 9
Project: Awesome-Scripts Author: DedSecInside File: BaggedSVM.py License: MIT License | 6 votes |
def main(): # prepare data trainingSet=[] testSet=[] accuracy = 0.0 split = 0.25 loadDataset('../Dataset/combined.csv', split, trainingSet, testSet) print 'Train set: ' + repr(len(trainingSet)) print 'Test set: ' + repr(len(testSet)) # generate predictions predictions=[] trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1] columns = trainData.shape[1] X = np.array(trainData) y = np.array(trainingSet)[:,columns] clf = BaggingClassifier(SVC(C=1.0, kernel='linear', degree=5, gamma='auto', coef0=0.0, shrinking=True, probability=False,tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, random_state=None)) clf.fit(X, y) testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1] X_test = np.array(testData) y_test = np.array(testSet)[:,columns] accuracy = clf.score(X_test,y_test) accuracy *= 100 print("Accuracy %:",accuracy)
Example 10
Project: brew Author: viisar File: bagging.py License: MIT License | 6 votes |
def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'): self.base_classifier = base_classifier self.n_classifiers = n_classifiers # using the sklearn implementation of bagging for now self.sk_bagging = BaggingClassifier(base_estimator=base_classifier, n_estimators=n_classifiers, max_samples=1.0, max_features=1.0) self.ensemble = Ensemble() self.combiner = Combiner(rule=combination_rule)
Example 11
Project: brew Author: viisar File: bagging.py License: MIT License | 6 votes |
def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'): self.base_classifier = base_classifier self.n_classifiers = n_classifiers # using the sklearn implementation of bagging for now self.sk_bagging = BaggingClassifier(base_estimator=base_classifier, n_estimators=n_classifiers, max_samples=1.0, max_features=1.0) self.ensemble = Ensemble() self.combiner = Combiner(rule=combination_rule)
Example 12
Project: twitter-stock-recommendation Author: alvarobartt File: test_bagging.py License: MIT License | 6 votes |
def test_classification(): # Check classification for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [1, 2, 4], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyClassifier(), Perceptron(tol=1e-3), DecisionTreeClassifier(), KNeighborsClassifier(), SVC()]: for params in grid: BaggingClassifier(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
Example 13
Project: twitter-stock-recommendation Author: alvarobartt File: test_bagging.py License: MIT License | 6 votes |
def test_warm_start(random_state=42): # Test if fitting incrementally with warm start gives a forest of the # right size and the same results as a normal fit. X, y = make_hastie_10_2(n_samples=20, random_state=1) clf_ws = None for n_estimators in [5, 10]: if clf_ws is None: clf_ws = BaggingClassifier(n_estimators=n_estimators, random_state=random_state, warm_start=True) else: clf_ws.set_params(n_estimators=n_estimators) clf_ws.fit(X, y) assert_equal(len(clf_ws), n_estimators) clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state, warm_start=False) clf_no_ws.fit(X, y) assert_equal(set([tree.random_state for tree in clf_ws]), set([tree.random_state for tree in clf_no_ws]))
Example 14
Project: twitter-stock-recommendation Author: alvarobartt File: test_bagging.py License: MIT License | 6 votes |
def test_warm_start_equal_n_estimators(): # Test that nothing happens when fitting without increasing n_estimators X, y = make_hastie_10_2(n_samples=20, random_state=1) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43) clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # modify X to nonsense values, this should not change anything X_train += 1. assert_warns_message(UserWarning, "Warm-start fitting without increasing n_estimators does not", clf.fit, X_train, y_train) assert_array_equal(y_pred, clf.predict(X_test))
Example 15
Project: twitter-stock-recommendation Author: alvarobartt File: test_bagging.py License: MIT License | 6 votes |
def test_warm_start_equivalence(): # warm started classifier with 5+5 estimators should be equivalent to # one classifier with 10 estimators X, y = make_hastie_10_2(n_samples=20, random_state=1) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43) clf_ws = BaggingClassifier(n_estimators=5, warm_start=True, random_state=3141) clf_ws.fit(X_train, y_train) clf_ws.set_params(n_estimators=10) clf_ws.fit(X_train, y_train) y1 = clf_ws.predict(X_test) clf = BaggingClassifier(n_estimators=10, warm_start=False, random_state=3141) clf.fit(X_train, y_train) y2 = clf.predict(X_test) assert_array_almost_equal(y1, y2)
Example 16
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_base.py License: MIT License | 5 votes |
def test_base(): # Check BaseEnsemble methods. ensemble = BaggingClassifier( base_estimator=Perceptron(tol=1e-3, random_state=None), n_estimators=3) iris = load_iris() ensemble.fit(iris.data, iris.target) ensemble.estimators_ = [] # empty the list and create estimators manually ensemble._make_estimator() random_state = np.random.RandomState(3) ensemble._make_estimator(random_state=random_state) ensemble._make_estimator(random_state=random_state) ensemble._make_estimator(append=False) assert_equal(3, len(ensemble)) assert_equal(3, len(ensemble.estimators_)) assert isinstance(ensemble[0], Perceptron) assert_equal(ensemble[0].random_state, None) assert isinstance(ensemble[1].random_state, int) assert isinstance(ensemble[2].random_state, int) assert_not_equal(ensemble[1].random_state, ensemble[2].random_state) np_int_ensemble = BaggingClassifier(base_estimator=Perceptron(tol=1e-3), n_estimators=np.int32(3)) np_int_ensemble.fit(iris.data, iris.target)
Example 17
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_base.py License: MIT License | 5 votes |
def test_base_zero_n_estimators(): # Check that instantiating a BaseEnsemble with n_estimators<=0 raises # a ValueError. ensemble = BaggingClassifier(base_estimator=Perceptron(tol=1e-3), n_estimators=0) iris = load_iris() assert_raise_message(ValueError, "n_estimators must be greater than zero, got 0.", ensemble.fit, iris.data, iris.target)
Example 18
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_base.py License: MIT License | 5 votes |
def test_base_not_int_n_estimators(): # Check that instantiating a BaseEnsemble with a string as n_estimators # raises a ValueError demanding n_estimators to be supplied as an integer. string_ensemble = BaggingClassifier(base_estimator=Perceptron(tol=1e-3), n_estimators='3') iris = load_iris() assert_raise_message(ValueError, "n_estimators must be an integer", string_ensemble.fit, iris.data, iris.target) float_ensemble = BaggingClassifier(base_estimator=Perceptron(tol=1e-3), n_estimators=3.0) assert_raise_message(ValueError, "n_estimators must be an integer", float_ensemble.fit, iris.data, iris.target)
Example 19
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_oob_score_classification(): # Check that oob prediction is a good estimation of the generalization # error. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) for base_estimator in [DecisionTreeClassifier(), SVC(gamma="scale")]: clf = BaggingClassifier(base_estimator=base_estimator, n_estimators=100, bootstrap=True, oob_score=True, random_state=rng).fit(X_train, y_train) test_score = clf.score(X_test, y_test) assert_less(abs(test_score - clf.oob_score_), 0.1) # Test with few estimators assert_warns(UserWarning, BaggingClassifier(base_estimator=base_estimator, n_estimators=1, bootstrap=True, oob_score=True, random_state=rng).fit, X_train, y_train)
Example 20
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_error(): # Test that it gives proper exception on deficient input. X, y = iris.data, iris.target base = DecisionTreeClassifier() # Test max_samples assert_raises(ValueError, BaggingClassifier(base, max_samples=-1).fit, X, y) assert_raises(ValueError, BaggingClassifier(base, max_samples=0.0).fit, X, y) assert_raises(ValueError, BaggingClassifier(base, max_samples=2.0).fit, X, y) assert_raises(ValueError, BaggingClassifier(base, max_samples=1000).fit, X, y) assert_raises(ValueError, BaggingClassifier(base, max_samples="foobar").fit, X, y) # Test max_features assert_raises(ValueError, BaggingClassifier(base, max_features=-1).fit, X, y) assert_raises(ValueError, BaggingClassifier(base, max_features=0.0).fit, X, y) assert_raises(ValueError, BaggingClassifier(base, max_features=2.0).fit, X, y) assert_raises(ValueError, BaggingClassifier(base, max_features=5).fit, X, y) assert_raises(ValueError, BaggingClassifier(base, max_features="foobar").fit, X, y) # Test support of decision_function assert not hasattr(BaggingClassifier(base).fit(X, y), 'decision_function')
Example 21
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_gridsearch(): # Check that bagging ensembles can be grid-searched. # Transform iris into a binary classification task X, y = iris.data, iris.target y[y == 2] = 1 # Grid search with scoring based on decision_function parameters = {'n_estimators': (1, 2), 'base_estimator__C': (1, 2)} GridSearchCV(BaggingClassifier(SVC(gamma="scale")), parameters, scoring="roc_auc").fit(X, y)
Example 22
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_bagging_sample_weight_unsupported_but_passed(): estimator = BaggingClassifier(DummyZeroEstimator()) rng = check_random_state(0) estimator.fit(iris.data, iris.target).predict(iris.data) assert_raises(ValueError, estimator.fit, iris.data, iris.target, sample_weight=rng.randint(10, size=(iris.data.shape[0])))
Example 23
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_warm_start_smaller_n_estimators(): # Test if warm start'ed second fit with smaller n_estimators raises error. X, y = make_hastie_10_2(n_samples=20, random_state=1) clf = BaggingClassifier(n_estimators=5, warm_start=True) clf.fit(X, y) clf.set_params(n_estimators=4) assert_raises(ValueError, clf.fit, X, y)
Example 24
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_oob_score_removed_on_warm_start(): X, y = make_hastie_10_2(n_samples=2000, random_state=1) clf = BaggingClassifier(n_estimators=50, oob_score=True) clf.fit(X, y) clf.set_params(warm_start=True, oob_score=False, n_estimators=100) clf.fit(X, y) assert_raises(AttributeError, getattr, clf, "oob_score_")
Example 25
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_oob_score_consistency(): # Make sure OOB scores are identical when random_state, estimator, and # training data are fixed and fitting is done twice X, y = make_hastie_10_2(n_samples=200, random_state=1) bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5, oob_score=True, random_state=1) assert_equal(bagging.fit(X, y).oob_score_, bagging.fit(X, y).oob_score_)
Example 26
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_estimators_samples(): # Check that format of estimators_samples_ is correct and that results # generated at fit time can be identically reproduced at a later time # using data saved in object attributes. X, y = make_hastie_10_2(n_samples=200, random_state=1) bagging = BaggingClassifier(LogisticRegression(), max_samples=0.5, max_features=0.5, random_state=1, bootstrap=False) bagging.fit(X, y) # Get relevant attributes estimators_samples = bagging.estimators_samples_ estimators_features = bagging.estimators_features_ estimators = bagging.estimators_ # Test for correct formatting assert_equal(len(estimators_samples), len(estimators)) assert_equal(len(estimators_samples[0]), len(X) // 2) assert_equal(estimators_samples[0].dtype.kind, 'i') # Re-fit single estimator to test for consistent sampling estimator_index = 0 estimator_samples = estimators_samples[estimator_index] estimator_features = estimators_features[estimator_index] estimator = estimators[estimator_index] X_train = (X[estimator_samples])[:, estimator_features] y_train = y[estimator_samples] orig_coefs = estimator.coef_ estimator.fit(X_train, y_train) new_coefs = estimator.coef_ assert_array_almost_equal(orig_coefs, new_coefs)
Example 27
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_estimators_samples_deterministic(): # This test is a regression test to check that with a random step # (e.g. SparseRandomProjection) and a given random state, the results # generated at fit time can be identically reproduced at a later time using # data saved in object attributes. Check issue #9524 for full discussion. iris = load_iris() X, y = iris.data, iris.target base_pipeline = make_pipeline(SparseRandomProjection(n_components=2), LogisticRegression()) clf = BaggingClassifier(base_estimator=base_pipeline, max_samples=0.5, random_state=0) clf.fit(X, y) pipeline_estimator_coef = clf.estimators_[0].steps[-1][1].coef_.copy() estimator = clf.estimators_[0] estimator_sample = clf.estimators_samples_[0] estimator_feature = clf.estimators_features_[0] X_train = (X[estimator_sample])[:, estimator_feature] y_train = y[estimator_sample] estimator.fit(X_train, y_train) assert_array_equal(estimator.steps[-1][1].coef_, pipeline_estimator_coef)
Example 28
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_max_samples_consistency(): # Make sure validated max_samples and original max_samples are identical # when valid integer max_samples supplied by user max_samples = 100 X, y = make_hastie_10_2(n_samples=2*max_samples, random_state=1) bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=max_samples, max_features=0.5, random_state=1) bagging.fit(X, y) assert_equal(bagging._max_samples, max_samples)
Example 29
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_bagging_classifier_with_missing_inputs(): # Check that BaggingClassifier can accept X with missing/infinite data X = np.array([ [1, 3, 5], [2, None, 6], [2, np.nan, 6], [2, np.inf, 6], [2, np.NINF, 6], ]) y = np.array([3, 6, 6, 6, 6]) classifier = DecisionTreeClassifier() pipeline = make_pipeline( FunctionTransformer(replace, validate=False), classifier ) pipeline.fit(X, y).predict(X) bagging_classifier = BaggingClassifier(pipeline) bagging_classifier.fit(X, y) y_hat = bagging_classifier.predict(X) assert_equal(y.shape, y_hat.shape) bagging_classifier.predict_log_proba(X) bagging_classifier.predict_proba(X) # Verify that exceptions can be raised by wrapper classifier classifier = DecisionTreeClassifier() pipeline = make_pipeline(classifier) assert_raises(ValueError, pipeline.fit, X, y) bagging_classifier = BaggingClassifier(pipeline) assert_raises(ValueError, bagging_classifier.fit, X, y)
Example 30
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_bagging_small_max_features(): # Check that Bagging estimator can accept low fractional max_features X = np.array([[1, 2], [3, 4]]) y = np.array([1, 0]) bagging = BaggingClassifier(LogisticRegression(), max_features=0.3, random_state=1) bagging.fit(X, y)