Python sklearn.ensemble.BaggingClassifier() Examples

The following are 30 code examples of sklearn.ensemble.BaggingClassifier(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.ensemble , or try the search function

Example #1

Source File: BaggedSVM.py From Awesome-Scripts with MIT License

8 votes

def main():
	# prepare data
	trainingSet=[]
	testSet=[]
	accuracy = 0.0
	split = 0.25
	loadDataset('../Dataset/combined.csv', split, trainingSet, testSet)
	print 'Train set: ' + repr(len(trainingSet))
	print 'Test set: ' + repr(len(testSet))
	# generate predictions
	predictions=[]
	trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1]
  	columns = trainData.shape[1] 
	X = np.array(trainData)
	y = np.array(trainingSet)[:,columns]
	clf = BaggingClassifier(SVC(C=1.0, kernel='linear', degree=5, gamma='auto', coef0=0.0, shrinking=True, probability=False,tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, random_state=None))
	clf.fit(X, y)
	testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1]
	X_test = np.array(testData)
	y_test = np.array(testSet)[:,columns]
	accuracy = clf.score(X_test,y_test)
	accuracy *= 100
	print("Accuracy %:",accuracy)

Example #2

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_warm_start(random_state=42):
    # Test if fitting incrementally with warm start gives a forest of the
    # right size and the same results as a normal fit.
    X, y = make_hastie_10_2(n_samples=20, random_state=1)

    clf_ws = None
    for n_estimators in [5, 10]:
        if clf_ws is None:
            clf_ws = BaggingClassifier(n_estimators=n_estimators,
                                       random_state=random_state,
                                       warm_start=True)
        else:
            clf_ws.set_params(n_estimators=n_estimators)
        clf_ws.fit(X, y)
        assert_equal(len(clf_ws), n_estimators)

    clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state,
                                  warm_start=False)
    clf_no_ws.fit(X, y)

    assert_equal(set([tree.random_state for tree in clf_ws]),
                 set([tree.random_state for tree in clf_no_ws]))

Example #3

Source File: bagging.py From brew with MIT License

6 votes

def __init__(self,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote'):

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers

        # using the sklearn implementation of bagging for now
        self.sk_bagging = BaggingClassifier(base_estimator=base_classifier,
                                            n_estimators=n_classifiers,
                                            max_samples=1.0,
                                            max_features=1.0)

        self.ensemble = Ensemble()
        self.combiner = Combiner(rule=combination_rule)

Example #4

Source File: bagging.py From brew with MIT License

6 votes

def __init__(self,
                 base_classifier=None,
                 n_classifiers=100,
                 combination_rule='majority_vote'):

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers

        # using the sklearn implementation of bagging for now
        self.sk_bagging = BaggingClassifier(base_estimator=base_classifier,
                                            n_estimators=n_classifiers,
                                            max_samples=1.0,
                                            max_features=1.0)

        self.ensemble = Ensemble()
        self.combiner = Combiner(rule=combination_rule)

Example #5

Source File: BaggedLDA.py From Awesome-Scripts with MIT License

6 votes

def main():
	# prepare data
	trainingSet=[]
	testSet=[]
	accuracy = 0.0
	split = 0.25
	loadDataset('../Dataset/LDAdata.csv', split, trainingSet, testSet)
	print('Train set: ' + repr(len(trainingSet)))
	print('Test set: ' + repr(len(testSet)))
	trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1]
	columns = trainData.shape[1] 
	X = np.array(trainData)
	y = np.array(trainingSet)[:,columns]
	clf = BaggingClassifier(LDA())
	clf.fit(X, y)
	testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1]
	X_test = np.array(testData)
	y_test = np.array(testSet)[:,columns]
	accuracy = clf.score(X_test,y_test)
	accuracy *= 100
	print("Accuracy %:",accuracy)

Example #6

Source File: test_bagging.py From twitter-stock-recommendation with MIT License

6 votes

def test_classification():
    # Check classification for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [1, 2, 4],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyClassifier(),
                           Perceptron(tol=1e-3),
                           DecisionTreeClassifier(),
                           KNeighborsClassifier(),
                           SVC()]:
        for params in grid:
            BaggingClassifier(base_estimator=base_estimator,
                              random_state=rng,
                              **params).fit(X_train, y_train).predict(X_test)

Example #7

Source File: BaggedQDA.py From Awesome-Scripts with MIT License

6 votes

def main():
	# prepare data
	trainingSet=[]
	testSet=[]
	accuracy = 0.0
	split = 0.25
	loadDataset('../Dataset/combined.csv', split, trainingSet, testSet)
	print 'Train set: ' + repr(len(trainingSet))
	print 'Test set: ' + repr(len(testSet))
	# generate predictions
	predictions=[]
	trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1]
  	columns = trainData.shape[1] 
	X = np.array(trainData)
	y = np.array(trainingSet)[:,columns]
	clf = BaggingClassifier(QDA())
	clf.fit(X, y)
	testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1]
	X_test = np.array(testData)
	y_test = np.array(testSet)[:,columns]
	accuracy = clf.score(X_test,y_test)
	accuracy *= 100
	print("Accuracy %:",accuracy)

Example #8

Source File: BaggedKNN.py From Awesome-Scripts with MIT License

6 votes

def main():
	# prepare data
	trainingSet=[]
	testSet=[]
	accuracy = 0.0
	split = 0.25
	loadDataset('../Dataset/combined.csv', split, trainingSet, testSet)
	print 'Train set: ' + repr(len(trainingSet))
	print 'Test set: ' + repr(len(testSet))
	# generate predictions
	predictions=[]
	trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1]
  	columns = trainData.shape[1] 
	X = np.array(trainData)
	y = np.array(trainingSet)[:,columns]
	clf = BaggingClassifier(KNN(n_neighbors=10, weights='uniform', algorithm='auto', leaf_size=10, p=1, metric='minkowski', metric_params=None, n_jobs=1))
	clf.fit(X, y)
	testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1]
	X_test = np.array(testData)
	y_test = np.array(testSet)[:,columns]
	accuracy = clf.score(X_test,y_test)
	accuracy *= 100
	print("Accuracy %:",accuracy)

Example #9

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_warm_start_equivalence():
    # warm started classifier with 5+5 estimators should be equivalent to
    # one classifier with 10 estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf_ws = BaggingClassifier(n_estimators=5, warm_start=True,
                               random_state=3141)
    clf_ws.fit(X_train, y_train)
    clf_ws.set_params(n_estimators=10)
    clf_ws.fit(X_train, y_train)
    y1 = clf_ws.predict(X_test)

    clf = BaggingClassifier(n_estimators=10, warm_start=False,
                            random_state=3141)
    clf.fit(X_train, y_train)
    y2 = clf.predict(X_test)

    assert_array_almost_equal(y1, y2)

Example #10

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_warm_start_equal_n_estimators():
    # Test that nothing happens when fitting without increasing n_estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    # modify X to nonsense values, this should not change anything
    X_train += 1.

    assert_warns_message(UserWarning,
                         "Warm-start fitting without increasing n_estimators does not",
                         clf.fit, X_train, y_train)
    assert_array_equal(y_pred, clf.predict(X_test))

Example #11

Source File: random_forest_with_bagging.py From Hanhan-Spark-Python with MIT License

6 votes

def main():
    indata = np.load(inputs)
    training_data = indata['data_training']
    training_labels = indata['label_training']
    validation_data = indata['data_val']
    validation_labels = indata['label_val']
    ts = range(1,11)
    sampling_rates = [round(0.1*t, 1) for t in ts]
    forest_sizes = [10, 20, 50, 100]


    for sampling_rate in sampling_rates:
        legend_label = 'sampling rate='+str(sampling_rate)
        accuracy_results = []
        for forest_size in forest_sizes:
            rf_clf = ensemble.BaggingClassifier(n_estimators=forest_size, max_samples=sampling_rate)
            rf_clf.fit(training_data, training_labels)
            predictions = rf_clf.predict(validation_data)
            accuracy = metrics.accuracy_score(validation_labels, predictions)
            accuracy_results.append(accuracy)
        plt.plot(range(len(forest_sizes)), accuracy_results, label=legend_label)

    plt.xticks(range(len(forest_sizes)), forest_sizes, size='small')
    plt.legend()
    plt.show()

Example #12

Source File: test_bagging.py From twitter-stock-recommendation with MIT License

6 votes

def test_warm_start(random_state=42):
    # Test if fitting incrementally with warm start gives a forest of the
    # right size and the same results as a normal fit.
    X, y = make_hastie_10_2(n_samples=20, random_state=1)

    clf_ws = None
    for n_estimators in [5, 10]:
        if clf_ws is None:
            clf_ws = BaggingClassifier(n_estimators=n_estimators,
                                       random_state=random_state,
                                       warm_start=True)
        else:
            clf_ws.set_params(n_estimators=n_estimators)
        clf_ws.fit(X, y)
        assert_equal(len(clf_ws), n_estimators)

    clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state,
                                  warm_start=False)
    clf_no_ws.fit(X, y)

    assert_equal(set([tree.random_state for tree in clf_ws]),
                 set([tree.random_state for tree in clf_no_ws]))

Example #13

Source File: test_bagging.py From twitter-stock-recommendation with MIT License

6 votes

def test_warm_start_equal_n_estimators():
    # Test that nothing happens when fitting without increasing n_estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    # modify X to nonsense values, this should not change anything
    X_train += 1.

    assert_warns_message(UserWarning,
                         "Warm-start fitting without increasing n_estimators does not",
                         clf.fit, X_train, y_train)
    assert_array_equal(y_pred, clf.predict(X_test))

Example #14

Source File: test_bagging.py From twitter-stock-recommendation with MIT License

6 votes

def test_warm_start_equivalence():
    # warm started classifier with 5+5 estimators should be equivalent to
    # one classifier with 10 estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf_ws = BaggingClassifier(n_estimators=5, warm_start=True,
                               random_state=3141)
    clf_ws.fit(X_train, y_train)
    clf_ws.set_params(n_estimators=10)
    clf_ws.fit(X_train, y_train)
    y1 = clf_ws.predict(X_test)

    clf = BaggingClassifier(n_estimators=10, warm_start=False,
                            random_state=3141)
    clf.fit(X_train, y_train)
    y2 = clf.predict(X_test)

    assert_array_almost_equal(y1, y2)

Example #15

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_classification():
    # Check classification for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [1, 2, 4],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyClassifier(),
                           Perceptron(tol=1e-3),
                           DecisionTreeClassifier(),
                           KNeighborsClassifier(),
                           SVC(gamma="scale")]:
        for params in grid:
            BaggingClassifier(base_estimator=base_estimator,
                              random_state=rng,
                              **params).fit(X_train, y_train).predict(X_test)

Example #16

Source File: test_bagging.py From twitter-stock-recommendation with MIT License

5 votes

def test_max_samples_consistency():
    # Make sure validated max_samples and original max_samples are identical
    # when valid integer max_samples supplied by user
    max_samples = 100
    X, y = make_hastie_10_2(n_samples=2*max_samples, random_state=1)
    bagging = BaggingClassifier(KNeighborsClassifier(),
                                max_samples=max_samples,
                                max_features=0.5, random_state=1)
    bagging.fit(X, y)
    assert_equal(bagging._max_samples, max_samples)

Example #17

Source File: test_bagging.py From twitter-stock-recommendation with MIT License

5 votes

def test_estimators_samples():
    # Check that format of estimators_samples_ is correct and that results
    # generated at fit time can be identically reproduced at a later time
    # using data saved in object attributes.
    X, y = make_hastie_10_2(n_samples=200, random_state=1)
    bagging = BaggingClassifier(LogisticRegression(), max_samples=0.5,
                                max_features=0.5, random_state=1,
                                bootstrap=False)
    bagging.fit(X, y)

    # Get relevant attributes
    estimators_samples = bagging.estimators_samples_
    estimators_features = bagging.estimators_features_
    estimators = bagging.estimators_

    # Test for correct formatting
    assert_equal(len(estimators_samples), len(estimators))
    assert_equal(len(estimators_samples[0]), len(X))
    assert_equal(estimators_samples[0].dtype.kind, 'b')

    # Re-fit single estimator to test for consistent sampling
    estimator_index = 0
    estimator_samples = estimators_samples[estimator_index]
    estimator_features = estimators_features[estimator_index]
    estimator = estimators[estimator_index]

    X_train = (X[estimator_samples])[:, estimator_features]
    y_train = y[estimator_samples]

    orig_coefs = estimator.coef_
    estimator.fit(X_train, y_train)
    new_coefs = estimator.coef_

    assert_array_almost_equal(orig_coefs, new_coefs)

Example #18

Source File: test_bagging.py From twitter-stock-recommendation with MIT License

5 votes

def test_warm_start_smaller_n_estimators():
    # Test if warm start'ed second fit with smaller n_estimators raises error.
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    clf = BaggingClassifier(n_estimators=5, warm_start=True)
    clf.fit(X, y)
    clf.set_params(n_estimators=4)
    assert_raises(ValueError, clf.fit, X, y)

Example #19

Source File: test_bagging.py From twitter-stock-recommendation with MIT License

5 votes

def test_oob_score_removed_on_warm_start():
    X, y = make_hastie_10_2(n_samples=2000, random_state=1)

    clf = BaggingClassifier(n_estimators=50, oob_score=True)
    clf.fit(X, y)

    clf.set_params(warm_start=True, oob_score=False, n_estimators=100)
    clf.fit(X, y)

    assert_raises(AttributeError, getattr, clf, "oob_score_")

Example #20

Source File: test_bagging.py From twitter-stock-recommendation with MIT License

5 votes

def test_oob_score_consistency():
    # Make sure OOB scores are identical when random_state, estimator, and
    # training data are fixed and fitting is done twice
    X, y = make_hastie_10_2(n_samples=200, random_state=1)
    bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5,
                                max_features=0.5, oob_score=True,
                                random_state=1)
    assert_equal(bagging.fit(X, y).oob_score_, bagging.fit(X, y).oob_score_)

Example #21

Source File: test_bagging.py From twitter-stock-recommendation with MIT License

5 votes

def test_bagging_sample_weight_unsupported_but_passed():
    estimator = BaggingClassifier(DummyZeroEstimator())
    rng = check_random_state(0)

    estimator.fit(iris.data, iris.target).predict(iris.data)
    assert_raises(ValueError, estimator.fit, iris.data, iris.target,
                  sample_weight=rng.randint(10, size=(iris.data.shape[0])))

Example #22

Source File: ml_knn.py From resilient-community-apps with MIT License

5 votes

def __init__(self, imbalance_upsampling=None, class_weight=None, random_state=1, n_neighbors=5, method=None, log=None):
        """

        :param imbalance_upsampling:    Use upsampling to compensate imbalance
        :param class_weight:            Use class_weight to compensate imbalance
        :param random_state:            Random state
        :param n_neighbors:             Number of neighbor samples to use
        :param method:                  Ensemble method
        :param log:                     Log
        """
        MlModelCommon.__init__(self,
                               imbalance_upsampling=imbalance_upsampling,
                               class_weight=class_weight,
                               method=method,
                               log=log)

        #
        #   class_weight is not supported for KNN.
        #
        if method == "Bagging":
            model = KNeighborsClassifier(n_neighbors=n_neighbors,
                                         metric="minkowski")
            self.ensemble_method = BaggingClassifier(base_estimator=model,
                                                     n_estimators=10,
                                                     random_state=random_state)
        elif method == "Adaptive Boosting":
            model = KNeighborsClassifier(n_neighbors=n_neighbors,
                                         metric="minkowski")
            self.ensemble_method = AdaBoostClassifier(base_estimator=model,
                                                      n_estimators=10,
                                                      random_state=random_state)
        else:
            self.ensemble_method = None
            KNeighborsClassifier.__init__(self,
                                          n_neighbors=n_neighbors,
                                          metric="minkowski")

Example #23

Source File: test_des_integration.py From DESlib with BSD 3-Clause "New" or "Revised" License

5 votes

def setup_classifiers(encode_labels=None):
    rng = np.random.RandomState(123456)

    X_dsel, X_test, X_train, y_dsel, y_test, y_train = load_dataset(
        encode_labels, rng)
    model = CalibratedClassifierCV(Perceptron(max_iter=5))
    # Train a pool of 100 classifiers
    pool_classifiers = BaggingClassifier(model, n_estimators=10,
                                         random_state=rng)
    pool_classifiers.fit(X_train, y_train)
    return pool_classifiers, X_dsel, y_dsel, X_test, y_test

Example #24

Source File: test_integration_DFP_IH.py From DESlib with BSD 3-Clause "New" or "Revised" License

5 votes

def setup_classifiers():
    rng = np.random.RandomState(654321)

    # Generate a classification dataset
    X, y = make_classification(n_classes=2, n_samples=1000, weights=[0.2, 0.8],
                               random_state=rng)
    # split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
                                                        random_state=rng)

    # Scale the variables to have 0 mean and unit variance
    scalar = StandardScaler()
    X_train = scalar.fit_transform(X_train)
    X_test = scalar.transform(X_test)

    # Split the data into training and DSEL for DS techniques
    X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
                                                        test_size=0.5,
                                                        random_state=rng)
    # Considering a pool composed of 10 base classifiers
    model = CalibratedClassifierCV(Perceptron(max_iter=5))

    pool_classifiers = BaggingClassifier(model, n_estimators=100,
                                         random_state=rng)
    pool_classifiers.fit(X_train, y_train)
    return pool_classifiers, X_dsel, y_dsel, X_test, y_test

Example #25

Source File: base.py From DESlib with BSD 3-Clause "New" or "Revised" License

5 votes

def fit(self, X, y):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : array of shape (n_samples, n_features)
            Data used to fit the model.

        y : array of shape (n_samples)
            class labels of each example in X.

        Returns
        -------
        self : object
            Returns self.
        """
        self.random_state_ = check_random_state(self.random_state)

        # Check if the pool of classifiers is None. If yes, use a
        # BaggingClassifier for the pool.
        if self.pool_classifiers is None:
            self.pool_classifiers_ = BaggingClassifier(
                random_state=self.random_state_)
            self.pool_classifiers_.fit(X, y)

        else:
            self.pool_classifiers_ = self.pool_classifiers

        self.n_classifiers_ = len(self.pool_classifiers_)

        # dealing with label encoder
        self.check_label_encoder()
        self.y_enc_ = self._setup_label_encoder(y)

        self.n_classes_ = self.classes_.size
        self.n_features_ = X.shape[1]

        return self

Example #26

Source File: ml_decision_tree.py From resilient-community-apps with MIT License

5 votes

def __init__(self, imbalance_upsampling=None, class_weight=None, method=None, random_state=10, log=None):

        MlModelCommon.__init__(self,
                               imbalance_upsampling=imbalance_upsampling,
                               class_weight=class_weight,
                               method=method,
                               log=log)

        if method == "Bagging":
            model = DecisionTreeClassifier(class_weight=class_weight,
                                           min_samples_split=20,
                                           random_state=99)
            self.ensemble_method = BaggingClassifier(base_estimator=model,
                                                     n_estimators=10,
                                                     random_state=random_state)
        elif method == "Adaptive Boosting":
            model = DecisionTreeClassifier(class_weight=class_weight,
                                           min_samples_split=20,
                                           random_state=99)
            self.ensemble_method = AdaBoostClassifier(base_estimator=model,
                                                      n_estimators=50,
                                                      random_state=random_state)
        else:
            self.ensemble_method = None
            DecisionTreeClassifier.__init__(self,
                                            class_weight=class_weight,
                                            min_samples_split=20,
                                            random_state=99)

Example #27

Source File: ml_logistic_regression.py From resilient-community-apps with MIT License

5 votes

def __init__(self, imbalance_upsampling=None, class_weight=None, method=None, c=100.0, random_state=1, log=None):
        """
        Initialize the model
        :param imbalance_upsampling:    Using upsampling to compensate imbalanced dataset
        :param class_weight:            It can be None, "balanced", or a dict. Used for imbalance class
        :param method:                  Optional ensemble method
        :param c:                       Not supported yet.
        :param random_state:            Random state
        :param log:                     log
        """
        self.c = c
        self.random_state = random_state
        MlModelCommon.__init__(self,
                               imbalance_upsampling=imbalance_upsampling,
                               class_weight=class_weight,
                               method=method,
                               log=log)

        if method == "Bagging":
            model = LgRegression(C=c,
                                 class_weight=class_weight,
                                 random_state=random_state)
            self.ensemble_method = BaggingClassifier(base_estimator=model,
                                                     n_estimators=200,
                                                     random_state=random_state)
        elif method == "Adaptive Boosting":
            model = LgRegression(C=c,
                                 class_weight=class_weight,
                                 random_state=random_state)
            self.ensemble_method = AdaBoostClassifier(base_estimator=model,
                                                      n_estimators=200,
                                                      random_state=random_state)
        else:
            self.ensemble_method = None
            LgRegression.__init__(self,
                                  C=c,
                                  random_state=random_state,
                                  class_weight=class_weight)

Example #28

Source File: ml_gaussiannb.py From resilient-community-apps with MIT License

5 votes

def __init__(self, imbalance_upsampling=None, class_weight=None, method=None, random_state=1, log=None):
        """
        Construtor

        :param imbalance_upsampling:    Use upsampling to compensate imbalanced dataset
        :param class_weight:            Use class_weight to compensate imbalanced dataset
        :param method:                  [Optional] Ensemble method
        :param random_state:            Random state
        :param log:                     Log
        """
        MlModelCommon.__init__(self,
                               imbalance_upsampling=imbalance_upsampling,
                               class_weight=class_weight,
                               method=method,
                               log=log)
        #
        #   GaussianNB does not support class_weight
        #
        if method == "Bagging":
            model = GaussianNB()
            self.ensemble_method = BaggingClassifier(base_estimator=model,
                                                     n_estimators=100,
                                                     random_state=random_state)
        elif method == "Adaptive Boosting":
            model = GaussianNB()
            self.ensemble_method = AdaBoostClassifier(base_estimator=model,
                                                      n_estimators=100,
                                                      random_state=random_state)
        else:
            self.ensemble_method = None
            GaussianNB.__init__(self)

Example #29

Source File: ml_svm.py From resilient-community-apps with MIT License

5 votes

def __init__(self, imbalance_upsampling=None, class_weight=None, kernel="linear", C=1.0, random_state=1, method=None, log=None):
        """
        :param imbalance_upsampling:    Use upsampling to compensate imbalanced dataset
        :param class_weight:            Use class_weight to compensate imbalanced dataset
        :param kernel:                  "linear" or "rbf" for Gaussian kernel
        :param C:                       Not supported yet
        :param random_state:            Random state
        :param method:                  Bagging or Adaptive Boost
        :param log:                     Log
        """
        self.kernel = kernel
        MlModelCommon.__init__(self,
                               imbalance_upsampling=imbalance_upsampling,
                               class_weight=class_weight,
                               method=method,
                               log=log)
        if method == "Bagging":
            model = SVC(kernel=kernel,
                        class_weight=class_weight,
                        C=C,
                        random_state=random_state)
            self.ensemble_method = BaggingClassifier(base_estimator=model,
                                                     n_estimators=10,
                                                     random_state=random_state)
        elif method == "Adaptive Boosting":
            model = SVC(kernel=kernel,
                        class_weight=class_weight,
                        C=C,
                        random_state=random_state)
            self.ensemble_method = AdaBoostClassifier(base_estimator=model,
                                                      n_estimators=10,
                                                      random_state=random_state)
        else:
            self.ensemble_method = None
            SVC.__init__(self,
                         kernel=kernel,
                         class_weight=class_weight,
                         C=C,
                         random_state=random_state)

Example #30

Source File: impute.py From skoot with MIT License

5 votes

def __init__(self, cols=None, predictors=None, base_estimator=None,
                 n_estimators=10, max_samples=1.0, max_features=1.0,
                 bootstrap=True, bootstrap_features=False, n_jobs=1,
                 random_state=None, verbose=0, tmp_fill=-999., as_df=True):

        super(BaggedClassifierImputer, self).__init__(
            imputer_class=BaggingClassifier, cols=cols, predictors=predictors,
            base_estimator=base_estimator, n_estimators=n_estimators,
            max_samples=max_samples, max_features=max_features,
            bootstrap=bootstrap, bootstrap_features=bootstrap_features,
            n_jobs=n_jobs, random_state=random_state, verbose=verbose,
            tmp_fill=tmp_fill, as_df=as_df)