Python sklearn.ensemble.BaggingRegressor() Examples

The following are 25 code examples for showing how to use sklearn.ensemble.BaggingRegressor(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.ensemble , or try the search function .

Example 1
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 7 votes vote down vote up
def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR(gamma='scale')]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test) 
Example 2
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 6 votes vote down vote up
def test_bootstrap_features():
    # Test that bootstrapping features may generate duplicate features.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_features=1.0,
                                bootstrap_features=False,
                                random_state=rng).fit(X_train, y_train)

    for features in ensemble.estimators_features_:
        assert_equal(boston.data.shape[1], np.unique(features).shape[0])

    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_features=1.0,
                                bootstrap_features=True,
                                random_state=rng).fit(X_train, y_train)

    for features in ensemble.estimators_features_:
        assert_greater(boston.data.shape[1], np.unique(features).shape[0]) 
Example 3
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 6 votes vote down vote up
def test_parallel_regression():
    # Check parallel regression.
    rng = check_random_state(0)

    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=3,
                                random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=1,
                                random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y3) 
Example 4
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bagging.py    License: MIT License 6 votes vote down vote up
def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR()]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test) 
Example 5
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bagging.py    License: MIT License 6 votes vote down vote up
def test_bootstrap_features():
    # Test that bootstrapping features may generate duplicate features.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_features=1.0,
                                bootstrap_features=False,
                                random_state=rng).fit(X_train, y_train)

    for features in ensemble.estimators_features_:
        assert_equal(boston.data.shape[1], np.unique(features).shape[0])

    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_features=1.0,
                                bootstrap_features=True,
                                random_state=rng).fit(X_train, y_train)

    for features in ensemble.estimators_features_:
        assert_greater(boston.data.shape[1], np.unique(features).shape[0]) 
Example 6
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bagging.py    License: MIT License 6 votes vote down vote up
def test_parallel_regression():
    # Check parallel regression.
    rng = check_random_state(0)

    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=3,
                                random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=1,
                                random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict(X_test)
    assert_array_almost_equal(y1, y3) 
Example 7
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 5 votes vote down vote up
def test_bootstrap_samples():
    # Test that bootstrapping samples generate non-perfect base estimators.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    base_estimator = DecisionTreeRegressor().fit(X_train, y_train)

    # without bootstrap, all trees are perfect on the training set
    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_samples=1.0,
                                bootstrap=False,
                                random_state=rng).fit(X_train, y_train)

    assert_equal(base_estimator.score(X_train, y_train),
                 ensemble.score(X_train, y_train))

    # with bootstrap, trees are no longer perfect on the training set
    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_samples=1.0,
                                bootstrap=True,
                                random_state=rng).fit(X_train, y_train)

    assert_greater(base_estimator.score(X_train, y_train),
                   ensemble.score(X_train, y_train))

    # check that each sampling correspond to a complete bootstrap resample.
    # the size of each bootstrap should be the same as the input data but
    # the data should be different (checked using the hash of the data).
    ensemble = BaggingRegressor(base_estimator=DummySizeEstimator(),
                                bootstrap=True).fit(X_train, y_train)
    training_hash = []
    for estimator in ensemble.estimators_:
        assert estimator.training_size_ == X_train.shape[0]
        training_hash.append(estimator.training_hash_)
    assert len(set(training_hash)) == len(training_hash) 
Example 8
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 5 votes vote down vote up
def test_oob_score_regression():
    # Check that oob prediction is a good estimation of the generalization
    # error.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                           n_estimators=50,
                           bootstrap=True,
                           oob_score=True,
                           random_state=rng).fit(X_train, y_train)

    test_score = clf.score(X_test, y_test)

    assert_less(abs(test_score - clf.oob_score_), 0.1)

    # Test with few estimators
    assert_warns(UserWarning,
                 BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                  n_estimators=1,
                                  bootstrap=True,
                                  oob_score=True,
                                  random_state=rng).fit,
                 X_train,
                 y_train) 
Example 9
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 5 votes vote down vote up
def test_bagging_regressor_with_missing_inputs():
    # Check that BaggingRegressor can accept X with missing/infinite data
    X = np.array([
        [1, 3, 5],
        [2, None, 6],
        [2, np.nan, 6],
        [2, np.inf, 6],
        [2, np.NINF, 6],
    ])
    y_values = [
        np.array([2, 3, 3, 3, 3]),
        np.array([
            [2, 1, 9],
            [3, 6, 8],
            [3, 6, 8],
            [3, 6, 8],
            [3, 6, 8],
        ])
    ]
    for y in y_values:
        regressor = DecisionTreeRegressor()
        pipeline = make_pipeline(
            FunctionTransformer(replace, validate=False),
            regressor
        )
        pipeline.fit(X, y).predict(X)
        bagging_regressor = BaggingRegressor(pipeline)
        y_hat = bagging_regressor.fit(X, y).predict(X)
        assert_equal(y.shape, y_hat.shape)

        # Verify that exceptions can be raised by wrapper regressor
        regressor = DecisionTreeRegressor()
        pipeline = make_pipeline(regressor)
        assert_raises(ValueError, pipeline.fit, X, y)
        bagging_regressor = BaggingRegressor(pipeline)
        assert_raises(ValueError, bagging_regressor.fit, X, y) 
Example 10
Project: rampy   Author: charlesll   File: ml_regressor.py    License: GNU General Public License v2.0 5 votes vote down vote up
def fit(self):
        """Scale data and train the model with the indicated algorithm.

        Do not forget to tune the hyperparameters.

        Parameters
        ----------
        algorithm : String,
            "KernelRidge", "SVM", "LinearRegression", "Lasso", "ElasticNet", "NeuralNet", "BaggingNeuralNet", default = "SVM"

        """
        self.X_scaler.fit(self.X_train)
        self.Y_scaler.fit(self.y_train)

        # scaling the data in all cases, it may not be used during the fit later
        self.X_train_sc = self.X_scaler.transform(self.X_train)
        self.y_train_sc = self.Y_scaler.transform(self.y_train)

        self.X_test_sc = self.X_scaler.transform(self.X_test)
        self.y_test_sc = self.Y_scaler.transform(self.y_test)

        if self.algorithm == "KernelRidge":
            clf_kr = KernelRidge(kernel=self.user_kernel)
            self.model = sklearn.model_selection.GridSearchCV(clf_kr, cv=5, param_grid=self.param_kr)

        elif self.algorithm == "SVM":
            clf_svm = SVR(kernel=self.user_kernel)
            self.model = sklearn.model_selection.GridSearchCV(clf_svm, cv=5, param_grid=self.param_svm)

        elif self.algorithm == "Lasso":
            clf_lasso = sklearn.linear_model.Lasso(alpha=0.1,random_state=self.rand_state)
            self.model = sklearn.model_selection.GridSearchCV(clf_lasso, cv=5,
                                                              param_grid=dict(alpha=np.logspace(-5,5,30)))

        elif self.algorithm == "ElasticNet":
            clf_ElasticNet = sklearn.linear_model.ElasticNet(alpha=0.1, l1_ratio=0.5,random_state=self.rand_state)
            self.model = sklearn.model_selection.GridSearchCV(clf_ElasticNet,cv=5,
                                                              param_grid=dict(alpha=np.logspace(-5,5,30)))

        elif self.algorithm == "LinearRegression":
            self.model = sklearn.linear_model.LinearRegression()

        elif self.algorithm == "NeuralNet":
            self.model = MLPRegressor(**self.param_neurons)
        elif self.algorithm == "BaggingNeuralNet":
            nn_m = MLPRegressor(**self.param_neurons)

            self.model = BaggingRegressor(base_estimator = nn_m, **self.param_bag)

        if self.scaling == True:
            self.model.fit(self.X_train_sc, self.y_train_sc.reshape(-1,))
            predict_train_sc = self.model.predict(self.X_train_sc)
            self.prediction_train = self.Y_scaler.inverse_transform(predict_train_sc.reshape(-1,1))
            predict_test_sc = self.model.predict(self.X_test_sc)
            self.prediction_test = self.Y_scaler.inverse_transform(predict_test_sc.reshape(-1,1))
        else:
            self.model.fit(self.X_train, self.y_train.reshape(-1,))
            self.prediction_train = self.model.predict(self.X_train)
            self.prediction_test = self.model.predict(self.X_test) 
Example 11
Project: automl-phase-2   Author: jamesrobertlloyd   File: models.py    License: MIT License 5 votes vote down vote up
def __init__(self, info, verbose=True, debug_mode=False):
        self.label_num=info['label_num']
        self.target_num=info['target_num']
        self.task = info['task']
        self.metric = info['metric']
        self.postprocessor = None
        #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
        self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
        if debug_mode>=2:
            self.name = "RandomPredictor"
            self.model = RandomPredictor(self.target_num)
            self.predict_method = self.model.predict_proba 
            return
        if info['task']=='regression':
            if info['is_sparse']==True:
                self.name = "BaggingRidgeRegressor"
                self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            else:
                self.name = "GradientBoostingRegressor"
                self.model = GradientBoostingRegressor(n_estimators=1, verbose=verbose, warm_start = True)
            self.predict_method = self.model.predict # Always predict probabilities
        else:
            if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
                self.name = "RandomForestClassifier"
                self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            elif info['is_sparse']:                
                self.name = "BaggingNBClassifier"
                self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...                          
            else:
                self.name = "GradientBoostingClassifier"
                self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", min_samples_split=10, random_state=1, warm_start = True)")
            if info['task']=='multilabel.classification':
                self.model = MultiLabelEnsemble(self.model)
            self.predict_method = self.model.predict_proba 
Example 12
Project: Supply-demand-forecasting   Author: LevinJ   File: baggingmodel.py    License: MIT License 5 votes vote down vote up
def setClf(self):
#         min_samples_split = 3
        self.clf = BaggingRegressor(n_estimators = 100, max_samples =0.5, max_features =0.5, verbose = 100)
        return 
Example 13
Project: mltk-algo-contrib   Author: splunk   File: BaggingRegressor.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, options):
        self.handle_options(options)
        params = options.get('params', {})
        out_params = convert_params(
            params,
            floats=['max_samples', 'max_features'],
            bools=['bootstrap', 'bootstrap_features', 'oob_score', 'warm_start'],
            ints=['n_estimators'],
        )

        self.estimator = _BaggingRegressor(**out_params) 
Example 14
Project: skoot   Author: tgsmith61591   File: impute.py    License: MIT License 5 votes vote down vote up
def __init__(self, cols=None, predictors=None, base_estimator=None,
                 n_estimators=10, max_samples=1.0, max_features=1.0,
                 bootstrap=True, bootstrap_features=False, n_jobs=1,
                 random_state=None, verbose=0, tmp_fill=-999., as_df=True):

        super(BaggedRegressorImputer, self).__init__(
            imputer_class=BaggingRegressor, cols=cols, predictors=predictors,
            base_estimator=base_estimator, n_estimators=n_estimators,
            max_samples=max_samples, max_features=max_features,
            bootstrap=bootstrap, bootstrap_features=bootstrap_features,
            n_jobs=n_jobs, random_state=random_state, verbose=verbose,
            tmp_fill=tmp_fill, as_df=as_df) 
Example 15
Project: AirBnbPricePrediction   Author: PouyaREZ   File: baselines.py    License: MIT License 5 votes vote down vote up
def get_ensemble_models():
    rf = RandomForestRegressor(
        n_estimators=51, min_samples_leaf=5, min_samples_split=3, random_state=42,
        n_jobs=int(0.8*n_cores))
    bag = BaggingRegressor(n_estimators=51, random_state=42, n_jobs=int(0.8*n_cores))
    extra = ExtraTreesRegressor(n_estimators=71, random_state=42, n_jobs=int(0.8*n_cores))
    ada = AdaBoostRegressor(random_state=42)
    grad = GradientBoostingRegressor(n_estimators=101, random_state=42)
    classifier_list = [rf, bag, extra, ada, grad]
    classifier_name_list = ['Random Forests', 'Bagging',
                            'Extra Trees', 'AdaBoost', 'Gradient Boost']
    return classifier_list, classifier_name_list 
Example 16
Project: pandas-ml   Author: pandas-ml   File: test_ensemble.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.ensemble.AdaBoostClassifier,
                      ensemble.AdaBoostClassifier)
        self.assertIs(df.ensemble.AdaBoostRegressor,
                      ensemble.AdaBoostRegressor)
        self.assertIs(df.ensemble.BaggingClassifier,
                      ensemble.BaggingClassifier)
        self.assertIs(df.ensemble.BaggingRegressor,
                      ensemble.BaggingRegressor)
        self.assertIs(df.ensemble.ExtraTreesClassifier,
                      ensemble.ExtraTreesClassifier)
        self.assertIs(df.ensemble.ExtraTreesRegressor,
                      ensemble.ExtraTreesRegressor)

        self.assertIs(df.ensemble.GradientBoostingClassifier,
                      ensemble.GradientBoostingClassifier)
        self.assertIs(df.ensemble.GradientBoostingRegressor,
                      ensemble.GradientBoostingRegressor)

        self.assertIs(df.ensemble.IsolationForest,
                      ensemble.IsolationForest)

        self.assertIs(df.ensemble.RandomForestClassifier,
                      ensemble.RandomForestClassifier)
        self.assertIs(df.ensemble.RandomTreesEmbedding,
                      ensemble.RandomTreesEmbedding)
        self.assertIs(df.ensemble.RandomForestRegressor,
                      ensemble.RandomForestRegressor)

        self.assertIs(df.ensemble.VotingClassifier,
                      ensemble.VotingClassifier) 
Example 17
Project: abu   Author: bbfamily   File: ABuMLCreater.py    License: GNU General Public License v3.0 5 votes vote down vote up
def bagging_regressor(self, assign=True, base_estimator=DecisionTreeRegressor(), **kwargs):
        """
        有监督学习回归器,实例化BaggingRegressor,默认使用:
            BaggingRegressor(base_estimator=base_estimator, n_estimators=200,
                             bootstrap=True, oob_score=True, random_state=1)

        通过**kwargs即关键字参数透传BaggingRegressor,即:
            BaggingRegressor(**kwargs)

        :param base_estimator: 默认使用DecisionTreeRegressor()
        :param assign: 是否保存实例后的BaggingRegressor对象,默认True,self.reg = reg
        :param kwargs: 有参数情况下初始化: BaggingRegressor(**kwargs)
                       无参数情况下初始化: BaggingRegressor(base_estimator=base_estimator, reg_core, n_estimators=200,
                                                          bootstrap=True, oob_score=True, random_state=1)
        :return: 实例化的BaggingRegressor对象
        """
        if kwargs is not None and len(kwargs) > 0:
            if 'base_estimator' not in kwargs:
                kwargs['base_estimator'] = base_estimator
            reg = BaggingRegressor(**kwargs)
        else:
            reg = BaggingRegressor(base_estimator=base_estimator, n_estimators=200,
                                   bootstrap=True, oob_score=True, random_state=1)

        if assign:
            self.reg = reg
        return reg 
Example 18
Project: abu   Author: bbfamily   File: ABuMLCreater.py    License: GNU General Public License v3.0 5 votes vote down vote up
def bagging_regressor_best(self, x, y, param_grid=None, assign=True, n_jobs=-1, show=True):
        """
        寻找BaggingRegressor构造器的最优参数
        上层AbuML中bagging_regressor_best函数,直接使用AbuML中的x,y数据调用
        eg:
            bagging_regressor_best无param_grid参数调用:

            from abupy import AbuML, ml
            ttn_abu = AbuML.create_test_more_fiter()
            ttn_abu.bagging_regressor_best()

            bagging_regressor_best有param_grid参数调用:

            param_grid = {'max_samples': np.arange(1, 5), 'n_estimators': np.arange(100, 300, 50)}
            ttn_abu.bagging_regressor_best(param_grid=param_grid, n_jobs=-1)

            out: BaggingRegressor(max_samples=4, n_estimators=250)


        :param x: 训练集x矩阵,numpy矩阵
        :param y: 训练集y序列,numpy序列
        :param param_grid: 最优字典关键字参数,
                        eg:param_grid = {'max_samples': np.arange(1, 5), 'n_estimators': np.arange(100, 300, 50)}
        :param assign: 是否保存实例化后最优参数的学习器对象,默认True
        :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
        :param show: 是否可视化最优参数搜索结果
        :return: 通过最优参数构造的BaggingRegressor对象
        """
        return self._estimators_prarms_best(self.bagging_regressor, x, y, param_grid, assign, n_jobs, show) 
Example 19
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bagging.py    License: MIT License 5 votes vote down vote up
def test_bootstrap_samples():
    # Test that bootstrapping samples generate non-perfect base estimators.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    base_estimator = DecisionTreeRegressor().fit(X_train, y_train)

    # without bootstrap, all trees are perfect on the training set
    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_samples=1.0,
                                bootstrap=False,
                                random_state=rng).fit(X_train, y_train)

    assert_equal(base_estimator.score(X_train, y_train),
                 ensemble.score(X_train, y_train))

    # with bootstrap, trees are no longer perfect on the training set
    ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                max_samples=1.0,
                                bootstrap=True,
                                random_state=rng).fit(X_train, y_train)

    assert_greater(base_estimator.score(X_train, y_train),
                   ensemble.score(X_train, y_train)) 
Example 20
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bagging.py    License: MIT License 5 votes vote down vote up
def test_oob_score_regression():
    # Check that oob prediction is a good estimation of the generalization
    # error.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                           n_estimators=50,
                           bootstrap=True,
                           oob_score=True,
                           random_state=rng).fit(X_train, y_train)

    test_score = clf.score(X_test, y_test)

    assert_less(abs(test_score - clf.oob_score_), 0.1)

    # Test with few estimators
    assert_warns(UserWarning,
                 BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                  n_estimators=1,
                                  bootstrap=True,
                                  oob_score=True,
                                  random_state=rng).fit,
                 X_train,
                 y_train) 
Example 21
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 4 votes vote down vote up
def test_sparse_regression():
    # Check regression for various parameter settings on sparse input.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)

    class CustomSVR(SVR):
        """SVC variant that records the nature of the training set"""

        def fit(self, X, y):
            super().fit(X, y)
            self.data_type_ = type(X)
            return self

    parameter_sets = [
        {"max_samples": 0.5,
         "max_features": 2,
         "bootstrap": True,
         "bootstrap_features": True},
        {"max_samples": 1.0,
         "max_features": 4,
         "bootstrap": True,
         "bootstrap_features": True},
        {"max_features": 2,
         "bootstrap": False,
         "bootstrap_features": True},
        {"max_samples": 0.5,
         "bootstrap": True,
         "bootstrap_features": False},
    ]

    for sparse_format in [csc_matrix, csr_matrix]:
        X_train_sparse = sparse_format(X_train)
        X_test_sparse = sparse_format(X_test)
        for params in parameter_sets:

            # Trained on sparse format
            sparse_classifier = BaggingRegressor(
                base_estimator=CustomSVR(gamma='scale'),
                random_state=1,
                **params
            ).fit(X_train_sparse, y_train)
            sparse_results = sparse_classifier.predict(X_test_sparse)

            # Trained on dense format
            dense_results = BaggingRegressor(
                base_estimator=CustomSVR(gamma='scale'),
                random_state=1,
                **params
            ).fit(X_train, y_train).predict(X_test)

            sparse_type = type(X_train_sparse)
            types = [i.data_type_ for i in sparse_classifier.estimators_]

            assert_array_almost_equal(sparse_results, dense_results)
            assert all([t == sparse_type for t in types])
            assert_array_almost_equal(sparse_results, dense_results) 
Example 22
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 4 votes vote down vote up
def test_base_estimator():
    # Check base_estimator and its default values.
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    ensemble = BaggingClassifier(None,
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier)

    ensemble = BaggingClassifier(Perceptron(tol=1e-3),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    assert isinstance(ensemble.base_estimator_, Perceptron)

    # Regression
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(None,
                                n_jobs=3,
                                random_state=0).fit(X_train, y_train)

    assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor)

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=3,
                                random_state=0).fit(X_train, y_train)

    assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor)

    ensemble = BaggingRegressor(SVR(gamma='scale'),
                                n_jobs=3,
                                random_state=0).fit(X_train, y_train)
    assert isinstance(ensemble.base_estimator_, SVR) 
Example 23
Project: AmusingPythonCodes   Author: IsaacChanghau   File: pca_regression.py    License: MIT License 4 votes vote down vote up
def lets_try(train, labels):
    results = {}

    def test_model(clf):
        cv = KFold(n_splits=5, shuffle=True, random_state=45)
        r2 = make_scorer(r2_score)
        r2_val_score = cross_val_score(clf, train, labels, cv=cv, scoring=r2)
        scores = [r2_val_score.mean()]
        return scores

    clf = linear_model.LinearRegression()
    results["Linear"] = test_model(clf)

    clf = linear_model.Ridge()
    results["Ridge"] = test_model(clf)

    clf = linear_model.BayesianRidge()
    results["Bayesian Ridge"] = test_model(clf)

    clf = linear_model.HuberRegressor()
    results["Hubber"] = test_model(clf)

    clf = linear_model.Lasso(alpha=1e-4)
    results["Lasso"] = test_model(clf)

    clf = BaggingRegressor()
    results["Bagging"] = test_model(clf)

    clf = RandomForestRegressor()
    results["RandomForest"] = test_model(clf)

    clf = AdaBoostRegressor()
    results["AdaBoost"] = test_model(clf)

    clf = svm.SVR()
    results["SVM RBF"] = test_model(clf)

    clf = svm.SVR(kernel="linear")
    results["SVM Linear"] = test_model(clf)

    results = pd.DataFrame.from_dict(results, orient='index')
    results.columns = ["R Square Score"]
    # results = results.sort(columns=["R Square Score"], ascending=False)
    results.plot(kind="bar", title="Model Scores")
    axes = plt.gca()
    axes.set_ylim([0.5, 1])
    return results 
Example 24
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bagging.py    License: MIT License 4 votes vote down vote up
def test_sparse_regression():
    # Check regression for various parameter settings on sparse input.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)

    class CustomSVR(SVR):
        """SVC variant that records the nature of the training set"""

        def fit(self, X, y):
            super(CustomSVR, self).fit(X, y)
            self.data_type_ = type(X)
            return self

    parameter_sets = [
        {"max_samples": 0.5,
         "max_features": 2,
         "bootstrap": True,
         "bootstrap_features": True},
        {"max_samples": 1.0,
         "max_features": 4,
         "bootstrap": True,
         "bootstrap_features": True},
        {"max_features": 2,
         "bootstrap": False,
         "bootstrap_features": True},
        {"max_samples": 0.5,
         "bootstrap": True,
         "bootstrap_features": False},
    ]

    for sparse_format in [csc_matrix, csr_matrix]:
        X_train_sparse = sparse_format(X_train)
        X_test_sparse = sparse_format(X_test)
        for params in parameter_sets:

            # Trained on sparse format
            sparse_classifier = BaggingRegressor(
                base_estimator=CustomSVR(),
                random_state=1,
                **params
            ).fit(X_train_sparse, y_train)
            sparse_results = sparse_classifier.predict(X_test_sparse)

            # Trained on dense format
            dense_results = BaggingRegressor(
                base_estimator=CustomSVR(),
                random_state=1,
                **params
            ).fit(X_train, y_train).predict(X_test)

            sparse_type = type(X_train_sparse)
            types = [i.data_type_ for i in sparse_classifier.estimators_]

            assert_array_equal(sparse_results, dense_results)
            assert all([t == sparse_type for t in types])
            assert_array_equal(sparse_results, dense_results) 
Example 25
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bagging.py    License: MIT License 4 votes vote down vote up
def test_base_estimator():
    # Check base_estimator and its default values.
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    ensemble = BaggingClassifier(None,
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))

    ensemble = BaggingClassifier(Perceptron(tol=1e-3),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, Perceptron))

    # Regression
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = BaggingRegressor(None,
                                n_jobs=3,
                                random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))

    ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                n_jobs=3,
                                random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))

    ensemble = BaggingRegressor(SVR(),
                                n_jobs=3,
                                random_state=0).fit(X_train, y_train)
    assert_true(isinstance(ensemble.base_estimator_, SVR))