Python sklearn.ensemble.ExtraTreesRegressor() Examples

The following are 19 code examples of sklearn.ensemble.ExtraTreesRegressor(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.ensemble , or try the search function .
Example #1
Source File: RandomForest.py    From pyGPGO with MIT License 6 votes vote down vote up
def fit(self, X, y):
        """
        Fit a Random Forest model to data `X` and targets `y`.

        Parameters
        ----------
        X : array-like
            Input values.
        y: array-like
            Target values.
        """
        self.X = X
        self.y = y
        self.n = self.X.shape[0]
        self.model = ExtraTreesRegressor(**self.params)
        self.model.fit(X, y) 
Example #2
Source File: extra_trees.py    From mljar-supervised with MIT License 6 votes vote down vote up
def __init__(self, params):
        super(ExtraTreesRegressorAlgorithm, self).__init__(params)
        logger.debug("ExtraTreesRegressorAlgorithm.__init__")

        self.library_version = sklearn.__version__
        self.trees_in_step = regression_additional.get("trees_in_step", 100)
        self.max_steps = regression_additional.get("max_steps", 50)
        self.early_stopping_rounds = regression_additional.get(
            "early_stopping_rounds", 50
        )
        self.model = ExtraTreesRegressor(
            n_estimators=self.trees_in_step,
            criterion=params.get("criterion", "mse"),
            max_features=params.get("max_features", 0.8),
            min_samples_split=params.get("min_samples_split", 4),
            warm_start=True,
            n_jobs=-1,
            random_state=params.get("seed", 1),
        ) 
Example #3
Source File: machine_learning_setup.py    From OpenOA with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, algorithm, params = None):
        '''
        Initialize the class with a list of possible algorithms and 
        recommended hyperparameter ranges
        '''    
        if algorithm == 'etr': # Extra trees regressor
            from sklearn.ensemble import ExtraTreesRegressor
            self.hyper_range = {"max_depth": [4, 8, 12, 16, 20],
                                "min_samples_split": np.arange(2, 11),
                                "min_samples_leaf": np.arange(1, 11),
                                "n_estimators": np.arange(10,801,40)}
            self.algorithm = ExtraTreesRegressor()
        
        elif algorithm == 'gbm': # Gradient boosting model
            from sklearn.ensemble import GradientBoostingRegressor
            self.hyper_range = {"max_depth": [4, 8, 12, 16, 20],
                                "min_samples_split": np.arange(2, 11),
                                "min_samples_leaf": np.arange(1, 11),
                                "n_estimators": np.arange(10,801,40)}
            self.algorithm = GradientBoostingRegressor()
        
        elif algorithm == 'gam': # Generalized additive model
            from pygam import GAM
            self.hyper_range = {'n_splines': np.arange(5,40)}
            self.algorithm = GAM()
        
        # Set scorer as R2
        self.my_scorer = make_scorer(r2_score, greater_is_better = True) 
Example #4
Source File: test_forest.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_min_impurity_decrease():
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    all_estimators = [RandomForestClassifier, RandomForestRegressor,
                      ExtraTreesClassifier, ExtraTreesRegressor]

    for Estimator in all_estimators:
        est = Estimator(min_impurity_decrease=0.1)
        est.fit(X, y)
        for tree in est.estimators_:
            # Simply check if the parameter is passed on correctly. Tree tests
            # will suffice for the actual working of this param
            assert_equal(tree.min_impurity_decrease, 0.1) 
Example #5
Source File: test_forest.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_min_impurity_split():
    # Test if min_impurity_split of base estimators is set
    # Regression test for #8006
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    all_estimators = [RandomForestClassifier, RandomForestRegressor,
                      ExtraTreesClassifier, ExtraTreesRegressor]

    for Estimator in all_estimators:
        est = Estimator(min_impurity_split=0.1)
        est = assert_warns_message(DeprecationWarning, "min_impurity_decrease",
                                   est.fit, X, y)
        for tree in est.estimators_:
            assert_equal(tree.min_impurity_split, 0.1) 
Example #6
Source File: test_ensemble.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.ensemble.AdaBoostClassifier,
                      ensemble.AdaBoostClassifier)
        self.assertIs(df.ensemble.AdaBoostRegressor,
                      ensemble.AdaBoostRegressor)
        self.assertIs(df.ensemble.BaggingClassifier,
                      ensemble.BaggingClassifier)
        self.assertIs(df.ensemble.BaggingRegressor,
                      ensemble.BaggingRegressor)
        self.assertIs(df.ensemble.ExtraTreesClassifier,
                      ensemble.ExtraTreesClassifier)
        self.assertIs(df.ensemble.ExtraTreesRegressor,
                      ensemble.ExtraTreesRegressor)

        self.assertIs(df.ensemble.GradientBoostingClassifier,
                      ensemble.GradientBoostingClassifier)
        self.assertIs(df.ensemble.GradientBoostingRegressor,
                      ensemble.GradientBoostingRegressor)

        self.assertIs(df.ensemble.IsolationForest,
                      ensemble.IsolationForest)

        self.assertIs(df.ensemble.RandomForestClassifier,
                      ensemble.RandomForestClassifier)
        self.assertIs(df.ensemble.RandomTreesEmbedding,
                      ensemble.RandomTreesEmbedding)
        self.assertIs(df.ensemble.RandomForestRegressor,
                      ensemble.RandomForestRegressor)

        self.assertIs(df.ensemble.VotingClassifier,
                      ensemble.VotingClassifier) 
Example #7
Source File: model.py    From numerox with GNU General Public License v3.0 5 votes vote down vote up
def fit_predict(self, dfit, dpre, tournament):
        clf = ETC(criterion='mse',
                  max_features=self.p['nfeatures'],
                  max_depth=self.p['depth'],
                  n_estimators=self.p['ntrees'],
                  random_state=self.p['seed'],
                  n_jobs=-1)
        clf.fit(dfit.x, dfit.y[tournament])
        yhat = clf.predict(dpre.x)
        return dpre.ids, yhat 
Example #8
Source File: baselines.py    From AirBnbPricePrediction with MIT License 5 votes vote down vote up
def get_ensemble_models():
    rf = RandomForestRegressor(
        n_estimators=51, min_samples_leaf=5, min_samples_split=3, random_state=42,
        n_jobs=int(0.8*n_cores))
    bag = BaggingRegressor(n_estimators=51, random_state=42, n_jobs=int(0.8*n_cores))
    extra = ExtraTreesRegressor(n_estimators=71, random_state=42, n_jobs=int(0.8*n_cores))
    ada = AdaBoostRegressor(random_state=42)
    grad = GradientBoostingRegressor(n_estimators=101, random_state=42)
    classifier_list = [rf, bag, extra, ada, grad]
    classifier_name_list = ['Random Forests', 'Bagging',
                            'Extra Trees', 'AdaBoost', 'Gradient Boost']
    return classifier_list, classifier_name_list 
Example #9
Source File: export_tests.py    From tpot with GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_export_pipeline_5():
    """Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with SelectFromModel."""
    pipeline_string = (
        'DecisionTreeRegressor(SelectFromModel(input_matrix, '
        'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, '
        'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,'
        'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)'
    )
    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj_reg._pset)
    expected_code = """import numpy as np
import pandas as pd
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeRegressor

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'], random_state=None)

exported_pipeline = make_pipeline(
    SelectFromModel(estimator=ExtraTreesRegressor(max_features=0.05, n_estimators=100), threshold=0.05),
    DecisionTreeRegressor(max_depth=8, min_samples_leaf=5, min_samples_split=5)
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    assert expected_code == export_pipeline(pipeline, tpot_obj_reg.operators, tpot_obj_reg._pset) 
Example #10
Source File: ExtraTreesRegressor.py    From mltk-algo-contrib with Apache License 2.0 5 votes vote down vote up
def __init__(self, options):
        self.handle_options(options)
        params = options.get('params', {})
        out_params = convert_params(
            params,
            floats=['max_samples', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'max_features', 'min_impurity_split'],
            bools=['bootstrap', 'oob_score', 'warm_start'],
            ints=['n_estimators', 'max_depth', 'max_leaf_nodes', 'min_impurity_decrease'],
            strs=['criterion'],
        )

        self.estimator = _ExtraTreesRegressor(**out_params) 
Example #11
Source File: RandomForest.py    From pyGPGO with MIT License 5 votes vote down vote up
def __init__(self, **params):
        """
        Wrapper around sklearn's ExtraTreesRegressor implementation for pyGPGO.
        Random Forests can also be used for surrogate models in Bayesian Optimization.
        An estimate of 'posterior' variance can be obtained by using the `impurity`
        criterion value in each subtree.

        Parameters
        ----------
        params: tuple, optional
            Any parameters to pass to `RandomForestRegressor`. Defaults to sklearn's.

        """
        self.params = params 
Example #12
Source File: extra_trees.py    From driverlessai-recipes with Apache License 2.0 5 votes vote down vote up
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs):
        orig_cols = list(X.names)
        if self.num_classes >= 2:
            lb = LabelEncoder()
            lb.fit(self.labels)
            y = lb.transform(y)
            model = ExtraTreesClassifier(**self.params)
        else:
            model = ExtraTreesRegressor(**self.params)

        # Replace missing values with a value smaller than all observed values
        self.min = dict()
        for col in X.names:
            XX = X[:, col]
            self.min[col] = XX.min1()
            if self.min[col] is None or np.isnan(self.min[col]):
                self.min[col] = -1e10
            else:
                self.min[col] -= 1
            XX.replace(None, self.min[col])
            X[:, col] = XX
            assert X[dt.isna(dt.f[col]), col].nrows == 0
        X = X.to_numpy()

        model.fit(X, y)
        importances = np.array(model.feature_importances_)
        self.set_model_properties(model=model,
                                  features=orig_cols,
                                  importances=importances.tolist(),
                                  iterations=self.params['n_estimators']) 
Example #13
Source File: test_forest.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_min_impurity_decrease():
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    all_estimators = [RandomForestClassifier, RandomForestRegressor,
                      ExtraTreesClassifier, ExtraTreesRegressor]

    for Estimator in all_estimators:
        est = Estimator(min_impurity_decrease=0.1)
        est.fit(X, y)
        for tree in est.estimators_:
            # Simply check if the parameter is passed on correctly. Tree tests
            # will suffice for the actual working of this param
            assert_equal(tree.min_impurity_decrease, 0.1) 
Example #14
Source File: test_forest.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_min_impurity_split():
    # Test if min_impurity_split of base estimators is set
    # Regression test for #8006
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    all_estimators = [RandomForestClassifier, RandomForestRegressor,
                      ExtraTreesClassifier, ExtraTreesRegressor]

    for Estimator in all_estimators:
        est = Estimator(min_impurity_split=0.1)
        est = assert_warns_message(DeprecationWarning, "min_impurity_decrease",
                                   est.fit, X, y)
        for tree in est.estimators_:
            assert_equal(tree.min_impurity_split, 0.1) 
Example #15
Source File: task.py    From kaggle-HomeDepot with MIT License 4 votes vote down vote up
def _get_learner(self):
        # xgboost
        if self.learner_name in ["reg_xgb_linear", "reg_xgb_tree", "reg_xgb_tree_best_single_model"]:
            return XGBRegressor(**self.param_dict)
        if self.learner_name in ["clf_xgb_linear", "clf_xgb_tree"]:
            return XGBClassifier(**self.param_dict)
        # sklearn
        if self.learner_name == "reg_skl_lasso":
            return Lasso(**self.param_dict)
        if self.learner_name == "reg_skl_ridge":
            return Ridge(**self.param_dict)
        if self.learner_name == "reg_skl_random_ridge":
            return RandomRidge(**self.param_dict)
        if self.learner_name == "reg_skl_bayesian_ridge":
            return BayesianRidge(**self.param_dict)
        if self.learner_name == "reg_skl_svr":
            return SVR(**self.param_dict)
        if self.learner_name == "reg_skl_lsvr":
            return LinearSVR(**self.param_dict)
        if self.learner_name == "reg_skl_knn":
            return KNNRegressor(**self.param_dict)
        if self.learner_name == "reg_skl_etr":
            return ExtraTreesRegressor(**self.param_dict)
        if self.learner_name == "reg_skl_rf":
            return RandomForestRegressor(**self.param_dict)
        if self.learner_name == "reg_skl_gbm":
            return GradientBoostingRegressor(**self.param_dict)
        if self.learner_name == "reg_skl_adaboost":
            return AdaBoostRegressor(**self.param_dict)
        # keras
        if self.learner_name == "reg_keras_dnn":
            try:
                return KerasDNNRegressor(**self.param_dict)
            except:
                return None
        # rgf
        if self.learner_name == "reg_rgf":
            return RGFRegressor(**self.param_dict)
        # ensemble
        if self.learner_name == "reg_ensemble":
            return EnsembleLearner(**self.param_dict)
            
        return None 
Example #16
Source File: treeinterpreter.py    From treeinterpreter with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def predict(model, X, joint_contribution=False):
    """ Returns a triple (prediction, bias, feature_contributions), such
    that prediction ≈ bias + feature_contributions.
    Parameters
    ----------
    model : DecisionTreeRegressor, DecisionTreeClassifier,
        ExtraTreeRegressor, ExtraTreeClassifier,
        RandomForestRegressor, RandomForestClassifier,
        ExtraTreesRegressor, ExtraTreesClassifier
    Scikit-learn model on which the prediction should be decomposed.

    X : array-like, shape = (n_samples, n_features)
    Test samples.
    
    joint_contribution : boolean
    Specifies if contributions are given individually from each feature,
    or jointly over them

    Returns
    -------
    decomposed prediction : triple of
    * prediction, shape = (n_samples) for regression and (n_samples, n_classes)
        for classification
    * bias, shape = (n_samples) for regression and (n_samples, n_classes) for
        classification
    * contributions, If joint_contribution is False then returns and  array of 
        shape = (n_samples, n_features) for regression or
        shape = (n_samples, n_features, n_classes) for classification, denoting
        contribution from each feature.
        If joint_contribution is True, then shape is array of size n_samples,
        where each array element is a dict from a tuple of feature indices to
        to a value denoting the contribution from that feature tuple.
    """
    # Only single out response variable supported,
    if model.n_outputs_ > 1:
        raise ValueError("Multilabel classification trees not supported")

    if (isinstance(model, DecisionTreeClassifier) or
        isinstance(model, DecisionTreeRegressor)):
        return _predict_tree(model, X, joint_contribution=joint_contribution)
    elif (isinstance(model, RandomForestClassifier) or
          isinstance(model, ExtraTreesClassifier) or
          isinstance(model, RandomForestRegressor) or
          isinstance(model, ExtraTreesRegressor)):
        return _predict_forest(model, X, joint_contribution=joint_contribution)
    else:
        raise ValueError("Wrong model type. Base learner needs to be a "
                         "DecisionTreeClassifier or DecisionTreeRegressor.") 
Example #17
Source File: treeinterpreter.py    From treeinterpreter with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def _predict_forest(model, X, joint_contribution=False):
    """
    For a given RandomForestRegressor, RandomForestClassifier,
    ExtraTreesRegressor, or ExtraTreesClassifier returns a triple of
    [prediction, bias and feature_contributions], such that prediction ≈ bias +
    feature_contributions.
    """

    if joint_contribution:
        biases = []
        contributions = []
        predictions = []
        
        for tree in model.estimators_:
            pred, bias, contribution = _predict_tree(tree, X, joint_contribution=joint_contribution)

            biases.append(bias)
            contributions.append(contribution)
            predictions.append(pred)
        
        
        total_contributions = []
        
        for i in range(len(X)):
            contr = {}
            for j, dct in enumerate(contributions):
                for k in set(dct[i]).union(set(contr.keys())):
                    contr[k] = (contr.get(k, 0)*j + dct[i].get(k,0) ) / (j+1)

            total_contributions.append(contr)    
            
        for i, item in enumerate(contribution):
            total_contributions[i]
            sm = sum([v for v in contribution[i].values()])
                

        
        return (np.mean(predictions, axis=0), np.mean(biases, axis=0),
            total_contributions)
    else:
        mean_pred = None
        mean_bias = None
        mean_contribution = None

        for i, tree in enumerate(model.estimators_):
            pred, bias, contribution = _predict_tree(tree, X)

            if i < 1: # first iteration
                mean_bias = bias
                mean_contribution = contribution
                mean_pred = pred
            else:
                mean_bias = _iterative_mean(i, mean_bias, bias)
                mean_contribution = _iterative_mean(i, mean_contribution, contribution)
                mean_pred = _iterative_mean(i, mean_pred, pred)

        return mean_pred, mean_bias, mean_contribution 
Example #18
Source File: test_forest.py    From twitter-stock-recommendation with MIT License 4 votes vote down vote up
def test_distribution():
    rng = check_random_state(12321)

    # Single variable with 4 values
    X = rng.randint(0, 4, size=(1000, 1))
    y = rng.rand(1000)
    n_trees = 500

    clf = ExtraTreesRegressor(n_estimators=n_trees, random_state=42).fit(X, y)

    uniques = defaultdict(int)
    for tree in clf.estimators_:
        tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-")
                       for f, t in zip(tree.tree_.feature,
                                       tree.tree_.threshold))

        uniques[tree] += 1

    uniques = sorted([(1. * count / n_trees, tree)
                      for tree, count in uniques.items()])

    # On a single variable problem where X_0 has 4 equiprobable values, there
    # are 5 ways to build a random tree. The more compact (0,1/0,0/--0,2/--) of
    # them has probability 1/3 while the 4 others have probability 1/6.

    assert_equal(len(uniques), 5)
    assert_greater(0.20, uniques[0][0])  # Rough approximation of 1/6.
    assert_greater(0.20, uniques[1][0])
    assert_greater(0.20, uniques[2][0])
    assert_greater(0.20, uniques[3][0])
    assert_greater(uniques[4][0], 0.3)
    assert_equal(uniques[4][1], "0,1/0,0/--0,2/--")

    # Two variables, one with 2 values, one with 3 values
    X = np.empty((1000, 2))
    X[:, 0] = np.random.randint(0, 2, 1000)
    X[:, 1] = np.random.randint(0, 3, 1000)
    y = rng.rand(1000)

    clf = ExtraTreesRegressor(n_estimators=100, max_features=1,
                              random_state=1).fit(X, y)

    uniques = defaultdict(int)
    for tree in clf.estimators_:
        tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-")
                       for f, t in zip(tree.tree_.feature,
                                       tree.tree_.threshold))

        uniques[tree] += 1

    uniques = [(count, tree) for tree, count in uniques.items()]
    assert_equal(len(uniques), 8) 
Example #19
Source File: test_forest.py    From Mastering-Elasticsearch-7.0 with MIT License 4 votes vote down vote up
def test_distribution():
    rng = check_random_state(12321)

    # Single variable with 4 values
    X = rng.randint(0, 4, size=(1000, 1))
    y = rng.rand(1000)
    n_trees = 500

    clf = ExtraTreesRegressor(n_estimators=n_trees, random_state=42).fit(X, y)

    uniques = defaultdict(int)
    for tree in clf.estimators_:
        tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-")
                       for f, t in zip(tree.tree_.feature,
                                       tree.tree_.threshold))

        uniques[tree] += 1

    uniques = sorted([(1. * count / n_trees, tree)
                      for tree, count in uniques.items()])

    # On a single variable problem where X_0 has 4 equiprobable values, there
    # are 5 ways to build a random tree. The more compact (0,1/0,0/--0,2/--) of
    # them has probability 1/3 while the 4 others have probability 1/6.

    assert_equal(len(uniques), 5)
    assert_greater(0.20, uniques[0][0])  # Rough approximation of 1/6.
    assert_greater(0.20, uniques[1][0])
    assert_greater(0.20, uniques[2][0])
    assert_greater(0.20, uniques[3][0])
    assert_greater(uniques[4][0], 0.3)
    assert_equal(uniques[4][1], "0,1/0,0/--0,2/--")

    # Two variables, one with 2 values, one with 3 values
    X = np.empty((1000, 2))
    X[:, 0] = np.random.randint(0, 2, 1000)
    X[:, 1] = np.random.randint(0, 3, 1000)
    y = rng.rand(1000)

    clf = ExtraTreesRegressor(n_estimators=100, max_features=1,
                              random_state=1).fit(X, y)

    uniques = defaultdict(int)
    for tree in clf.estimators_:
        tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-")
                       for f, t in zip(tree.tree_.feature,
                                       tree.tree_.threshold))

        uniques[tree] += 1

    uniques = [(count, tree) for tree, count in uniques.items()]
    assert_equal(len(uniques), 8)