Python sklearn.ensemble.RandomForestRegressor() Examples

The following are 30 code examples of sklearn.ensemble.RandomForestRegressor(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.ensemble , or try the search function .
Example #1
Source File: friedman_scores.py    From mlens with MIT License 7 votes vote down vote up
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens 
Example #2
Source File: test_prediction.py    From Pyspatialml with GNU General Public License v3.0 7 votes vote down vote up
def test_regression(self):
        training_pt = gpd.read_file(ms.meuse)
        training = self.stack_meuse.extract_vector(gdf=training_pt)
        training["zinc"] = training_pt["zinc"]
        training["cadmium"] = training_pt["cadmium"]
        training["copper"] = training_pt["copper"]
        training["lead"] = training_pt["lead"]
        training = training.dropna()

        # single target regression
        regr = RandomForestRegressor(n_estimators=50)
        X = training.loc[:, self.stack_meuse.names]
        y = training["zinc"]
        regr.fit(X, y)

        single_regr = self.stack_meuse.predict(regr)
        self.assertIsInstance(single_regr, Raster)
        self.assertEqual(single_regr.count, 1)

        # multi-target regression
        y = training.loc[:, ["zinc", "cadmium", "copper", "lead"]]
        regr.fit(X, y)
        multi_regr = self.stack_meuse.predict(regr)
        self.assertIsInstance(multi_regr, Raster)
        self.assertEqual(multi_regr.count, 4) 
Example #3
Source File: pm25_RF_Regression.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 6 votes vote down vote up
def Train(data, treecount, tezh, yanzhgdata):
    model = RF(n_estimators=treecount, max_features=tezh)
    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
Example #4
Source File: models.py    From jh-kaggle-util with Apache License 2.0 6 votes vote down vote up
def run_sklearn():
  n_trees = 100
  n_folds = 3

  # https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/
  alg_list = [
      ['lreg',LinearRegression()],
      ['rforest',RandomForestRegressor(n_estimators=1000, n_jobs=-1, max_depth=3)],
      ['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=2)],
      ['adaboost',AdaBoostRegressor(base_estimator=None, n_estimators=600, learning_rate=1.0)],
      ['knn', sklearn.neighbors.KNeighborsRegressor(n_neighbors=5)]
  ]

  start_time = time.time()
  for name,alg in alg_list:
      train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False)
      train.run()
      train = None
  elapsed_time = time.time() - start_time
  print("Elapsed time: {}".format(jhkaggle.util.hms_string(elapsed_time))) 
Example #5
Source File: ensemble_glm.py    From jh-kaggle-util with Apache License 2.0 6 votes vote down vote up
def fit_ensemble(x,y):
    fit_type = jhkaggle.jhkaggle_config['FIT_TYPE']
    if 1:
        if fit_type == jhkaggle.const.FIT_TYPE_BINARY_CLASSIFICATION:
            blend = SGDClassifier(loss="log", penalty="elasticnet")  # LogisticRegression()
        else:
            # blend = SGDRegressor()
            #blend = LinearRegression()
            #blend = RandomForestRegressor(n_estimators=10, n_jobs=-1, max_depth=5, criterion='mae')
            blend = LassoLarsCV(normalize=True)
            #blend = ElasticNetCV(normalize=True)
            #blend = LinearRegression(normalize=True)
        blend.fit(x, y)
    else:
        blend = LogisticRegression()
        blend.fit(x, y)


    return blend 
Example #6
Source File: classifier.py    From Semantic-Texual-Similarity-Toolkits with MIT License 6 votes vote down vote up
def train_model(self, train_file_path, model_path):
        print("==> Load the data ...")
        X_train, Y_train = self.load_file(train_file_path)
        print(train_file_path, shape(X_train))

        print("==> Train the model ...")
        min_max_scaler = preprocessing.MaxAbsScaler()
        X_train_minmax = min_max_scaler.fit_transform(X_train)
        clf = RandomForestRegressor(n_estimators=self.n_estimators)
        clf.fit(X_train_minmax.toarray(), Y_train)

        print("==> Save the model ...")
        pickle.dump(clf, open(model_path, 'wb'))

        scaler_path = model_path.replace('.pkl', '.scaler.pkl')
        pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
        return clf 
Example #7
Source File: strategy.py    From bitpredict with MIT License 6 votes vote down vote up
def fit_and_trade(data, cols, split, threshold):
    '''
    Fits and backtests a theoretical trading strategy
    '''
    data = data[data.width > 0]
    X = data[cols]
    y = data.mid30
    X_train = X.iloc[:split]
    X_test = X.iloc[split:]
    y_train = y.iloc[:split]
    y_test = y.iloc[split:]
    regressor = RandomForestRegressor(n_estimators=100,
                                      min_samples_leaf=500,
                                      random_state=42,
                                      n_jobs=-1)
    regressor.fit(X_train.values, y_train.values)
    trade(X_test.values, y_test.values, regressor, threshold) 
Example #8
Source File: scorer.py    From scan with GNU Affero General Public License v3.0 6 votes vote down vote up
def __init__(self, text, scores):
        self.text = text
        self.scores = scores
        self.feature_generator = FeatureGenerator()
        self.classifier = RandomForestRegressor(
            n_estimators=100,
            min_samples_split=4,
            min_samples_leaf=3,
            random_state=1
        )

        unique_scores = set(scores)
        if len(unique_scores) <= self.classification_max:
            self.classifier = RandomForestClassifier(
                n_estimators=100,
                min_samples_split=4,
                min_samples_leaf=3,
                random_state=1
            )

        self.fit_feats()
        self.fit_done = False 
Example #9
Source File: random_forest.py    From mljar-supervised with MIT License 6 votes vote down vote up
def __init__(self, params):
        super(RandomForestRegressorAlgorithm, self).__init__(params)
        logger.debug("RandomForestRegressorAlgorithm.__init__")

        self.library_version = sklearn.__version__
        self.trees_in_step = regression_additional.get("trees_in_step", 5)
        self.max_steps = regression_additional.get("max_steps", 3)
        self.early_stopping_rounds = regression_additional.get(
            "early_stopping_rounds", 50
        )
        self.model = RandomForestRegressor(
            n_estimators=self.trees_in_step,
            criterion=params.get("criterion", "mse"),
            max_features=params.get("max_features", 0.8),
            min_samples_split=params.get("min_samples_split", 4),
            warm_start=True,
            n_jobs=-1,
            random_state=params.get("seed", 1),
        ) 
Example #10
Source File: test_stacking.py    From civisml-extensions with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_integration_regression(regression_test_data, n_jobs):
    """Construct, fit, and predict on realistic problem. Compare goodness of
    fit of stacked model vs. individual base estimators.
    """
    xtrain = regression_test_data['x']
    ytrain = regression_test_data['y']
    xtest = regression_test_data['xtest']
    ytest = regression_test_data['ytest']

    sr = StackedRegressor([('rf', RandomForestRegressor(random_state=7,
                                                        n_estimators=10)),
                           ('lr', LinearRegression()),
                           ('metalr', NonNegativeLinearRegression())],
                          n_jobs=n_jobs)
    rf = RandomForestRegressor(random_state=7, n_estimators=10)
    lr = LinearRegression()
    sr_mse = fit_predict_measure_reg(sr, xtrain, ytrain, xtest, ytest)
    rf_mse = fit_predict_measure_reg(rf, xtrain, ytrain, xtest, ytest)
    lr_mse = fit_predict_measure_reg(lr, xtrain, ytrain, xtest, ytest)

    # Stacked regressor should perform better than its base estimators on this
    # data.
    assert sr_mse < rf_mse
    assert sr_mse < lr_mse
    assert sr_mse < 1.5    # Sanity check 
Example #11
Source File: vanilla_model.py    From OpenChem with MIT License 6 votes vote down vote up
def __init__(self, model_type='classifier', feature_type='fingerprints',
                 n_estimators=100, n_ensemble=5):
        super(RandomForestQSAR, self).__init__()
        self.n_estimators = n_estimators
        self.n_ensemble = n_ensemble
        self.model = []
        self.model_type = model_type
        if self.model_type == 'classifier':
            for i in range(n_ensemble):
                self.model.append(RFC(n_estimators=n_estimators))
        elif self.model_type == 'regressor':
            for i in range(n_ensemble):
                self.model.append(RFR(n_estimators=n_estimators))
        else:
            raise ValueError('invalid value for argument')
        self.feature_type = feature_type
        if self.feature_type == 'descriptors':
            self.calc = Calculator(descriptors, ignore_3D=True)
            self.desc_mean = [0]*self.n_ensemble 
Example #12
Source File: test_stacking.py    From civisml-extensions with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_smoke_multiout_regression_methods(n_jobs):
    """Construct, fit, and predict on realistic problem.
    """
    X, y = make_regression(random_state=7, n_samples=100, n_features=10,
                           n_informative=4, n_targets=2)

    rng = np.random.RandomState(17)
    est_list = [('lr', LinearRegression()),
                ('rf', RandomForestRegressor(random_state=rng,
                                             n_estimators=10)),
                ('metalr', LinearRegression())]
    sm = StackedRegressor(est_list, n_jobs=n_jobs)
    sm.fit(X, y)
    sm.predict(X)
    sm.score(X, y)

    with pytest.raises(AttributeError):
        sm.predict_proba(X) 
Example #13
Source File: test_overfit.py    From deepchem with MIT License 6 votes vote down vote up
def test_sklearn_regression_overfit(self):
    """Test that sklearn models can overfit simple regression datasets."""
    n_samples = 10
    n_features = 3
    n_tasks = 1

    # Generate dummy dataset
    np.random.seed(123)
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.random.rand(n_samples, n_tasks)
    w = np.ones((n_samples, n_tasks))
    dataset = dc.data.NumpyDataset(X, y, w, ids)

    regression_metric = dc.metrics.Metric(dc.metrics.r2_score)
    sklearn_model = RandomForestRegressor()
    model = dc.models.SklearnModel(sklearn_model)

    # Fit trained model
    model.fit(dataset)
    model.save()

    # Eval model on train
    scores = model.evaluate(dataset, [regression_metric])
    assert scores[regression_metric.name] > .7 
Example #14
Source File: test_stacking.py    From civisml-extensions with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_smoke_regression_methods(regression_test_data, n_jobs):
    """Construct, fit, and predict on realistic problem.
    """
    xtrain = regression_test_data['x']
    ytrain = regression_test_data['y']

    rng = np.random.RandomState(17)
    est_list = [('lr', LinearRegression()),
                ('rf', RandomForestRegressor(random_state=rng,
                                             n_estimators=10)),
                ('nnls', NonNegativeLinearRegression())]
    sm = StackedRegressor(est_list, n_jobs=n_jobs)
    sm.fit(xtrain, ytrain)
    sm.predict(xtrain)
    sm.score(xtrain, ytrain)

    with pytest.raises(AttributeError):
        sm.predict_proba(xtrain) 
Example #15
Source File: test_random_forest_regression_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _train_convert_evaluate_assert(self, **scikit_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        scikit_model = RandomForestRegressor(random_state=1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df["prediction"] = scikit_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_regressor(spec, df, verbose=False)
            self._check_metrics(metrics, scikit_params) 
Example #16
Source File: test_ensemble.py    From m2cgen with MIT License 6 votes vote down vote up
def test_single_condition():
    estimator = ensemble.RandomForestRegressor(n_estimators=2, random_state=1)

    estimator.fit([[1], [2]], [1, 2])

    assembler = assemblers.RandomForestModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.BinNumExpr(
        ast.BinNumExpr(
            ast.NumVal(1.0),
            ast.IfExpr(
                ast.CompExpr(
                    ast.FeatureRef(0),
                    ast.NumVal(1.5),
                    ast.CompOpType.LTE),
                ast.NumVal(1.0),
                ast.NumVal(2.0)),
            ast.BinNumOpType.ADD),
        ast.NumVal(0.5),
        ast.BinNumOpType.MUL)

    assert utils.cmp_exprs(actual, expected) 
Example #17
Source File: regression_randomForest.py    From practicalDataAnalysisCookbook with GNU General Public License v2.0 6 votes vote down vote up
def regression_rf(x,y):
    '''
        Estimate a random forest regressor
    '''
    # create the regressor object
    random_forest = en.RandomForestRegressor(
        min_samples_split=80, random_state=666, 
        max_depth=5, n_estimators=10)

    # estimate the model
    random_forest.fit(x,y)

    # return the object
    return random_forest

# the file name of the dataset 
Example #18
Source File: test_io_types.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_random_forest_regressor(self):
        for dtype in self.number_data_type.keys():
            scikit_model = RandomForestRegressor(random_state=1)
            data = self.scikit_data["data"].astype(dtype)
            target = self.scikit_data["target"].astype(dtype)
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            test_data = data[0].reshape(1, -1)
            self._check_tree_model(spec, "multiArrayType", "doubleType", 1)
            coreml_model = create_model(spec)
            try:
                self.assertEqual(
                    scikit_model.predict(test_data)[0].dtype,
                    type(coreml_model.predict({"data": test_data})["target"]),
                )
                self.assertAlmostEqual(
                    scikit_model.predict(test_data)[0],
                    coreml_model.predict({"data": test_data})["target"],
                    msg="{} != {} for Dtype: {}".format(
                        scikit_model.predict(test_data)[0],
                        coreml_model.predict({"data": test_data})["target"],
                        dtype,
                    ),
                )
            except RuntimeError:
                print("{} not supported. ".format(dtype)) 
Example #19
Source File: arif.py    From HpBandSter with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def extend_partial(self, obs_losses, num_steps, config=None):
        # TODO: add variance predictions
        if config is None:
            config = []

        d_losses = self.apply_differencing(obs_losses)


        for t in range(num_steps):
            x = np.hstack([d_losses[-self.order:], config])
            y = self.rfr.predict([x])
            d_losses = np.hstack([d_losses, y])


        prediction = self.invert_differencing( obs_losses, d_losses[-num_steps:])

        return(prediction) 
Example #20
Source File: arif.py    From HpBandSter with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, losses, configs=None):

        if configs is None:
            configs = [[]]*len(times)

        # convert learning curves into X and y data

        X = []
        y = []

        for l,c in zip(losses, configs):
            l = self.apply_differencing(l)

            for i in range(self.order, len(l)):
                X.append(np.hstack([l[i-self.order:i], c]))
                y.append(l[i])

        self.X = np.array(X)
        self.y = np.array(y)


        self.rfr = rfr().fit(self.X,self.y) 
Example #21
Source File: test_regression_tests.py    From drifter_ml with MIT License 6 votes vote down vote up
def generate_regression_data_and_models():
    df = pd.DataFrame()
    for _ in range(1000):
        a = np.random.normal(0, 1)
        b = np.random.normal(0, 3)
        c = np.random.normal(12, 4)
        target = a + b + c
        df = df.append({
            "A": a,
            "B": b,
            "C": c,
            "target": target
        }, ignore_index=True)

    reg1 = tree.DecisionTreeRegressor()
    reg2 = ensemble.RandomForestRegressor()
    column_names = ["A", "B", "C"]
    target_name = "target"
    X = df[column_names]
    reg1.fit(X, df[target_name])
    reg2.fit(X, df[target_name])
    return df, column_names, target_name, reg1, reg2 
Example #22
Source File: RandomForest.py    From pyGPGO with MIT License 6 votes vote down vote up
def fit(self, X, y):
        """
        Fit a Random Forest model to data `X` and targets `y`.

        Parameters
        ----------
        X : array-like
            Input values.
        y: array-like
            Target values.
        """
        self.X = X
        self.y = y
        self.n = self.X.shape[0]
        self.model = RandomForestRegressor(**self.params)
        self.model.fit(X, y) 
Example #23
Source File: ranking.py    From news-popularity-prediction with Apache License 2.0 6 votes vote down vote up
def get_regressor_fitted(file_path,
                         X_train,
                         X_test,
                         y_train,
                         y_test):
    if os.path.exists(file_path):
        try:
            regressor_fitted = load_sklearn_model(file_path)
        except EOFError as e:
            print(file_path)
            raise e
    else:
        regressor = RandomForestRegressor(n_estimators=50,
                                          criterion="mse",
                                          max_features="auto",
                                          n_jobs=get_threads_number())

        regressor_fitted = regressor.fit(X_train, y_train)

        store_sklearn_model(file_path, regressor_fitted)
    return regressor_fitted 
Example #24
Source File: random_forest_regressor.py    From Python with MIT License 5 votes vote down vote up
def main():

    """
    Random Forest Regressor Example using sklearn function.
    Boston house price dataset is used to demonstrate the algorithm.
    """

    # Load Boston house price dataset
    boston = load_boston()
    print(boston.keys())

    # Split dataset into train and test data
    X = boston["data"]  # features
    Y = boston["target"]
    x_train, x_test, y_train, y_test = train_test_split(
        X, Y, test_size=0.3, random_state=1
    )

    # Random Forest Regressor
    rand_for = RandomForestRegressor(random_state=42, n_estimators=300)
    rand_for.fit(x_train, y_train)

    # Predict target for test data
    predictions = rand_for.predict(x_test)
    predictions = predictions.reshape(len(predictions), 1)

    # Error printing
    print(f"Mean Absolute Error:\t {mean_absolute_error(y_test, predictions)}")
    print(f"Mean Square Error  :\t {mean_squared_error(y_test, predictions)}") 
Example #25
Source File: model_wrapper.py    From AMPL with MIT License 5 votes vote down vote up
def __init__(self, params, featurizer, ds_client):
        """Initializes DCRFModelWrapper object.

        Args:
            params (Namespace object): contains all parameter information.
            featurizer (Featurization): Object managing the featurization of compounds
            ds_client: datastore client.
        """
        super().__init__(params, featurizer, ds_client)
        self.best_model_dir = os.path.join(self.output_dir, 'best_model')
        self.model_dir = self.best_model_dir
        self.baseline_model_dir = self.best_model_dir
        os.makedirs(self.best_model_dir, exist_ok=True)

        if self.params.prediction_type == 'regression':
            rf_model = RandomForestRegressor(n_estimators=self.params.rf_estimators,
                                             max_features=self.params.rf_max_features,
                                             max_depth=self.params.rf_max_depth,
                                             n_jobs=-1)
        else:
            rf_model = RandomForestClassifier(n_estimators=self.params.rf_estimators,
                                              max_features=self.params.rf_max_features,
                                              max_depth=self.params.rf_max_depth,
                                              n_jobs=-1)

        self.model = dc.models.sklearn_models.SklearnModel(rf_model, model_dir=self.best_model_dir)

    # **************************************************************************************** 
Example #26
Source File: random_forest.py    From driverlessai-recipes with Apache License 2.0 5 votes vote down vote up
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs):
        orig_cols = list(X.names)
        if self.num_classes >= 2:
            lb = LabelEncoder()
            lb.fit(self.labels)
            y = lb.transform(y)
            model = RandomForestClassifier(**self.params)
        else:
            model = RandomForestRegressor(**self.params)

        # Replace missing values with a value smaller than all observed values
        self.min = dict()
        for col in X.names:
            XX = X[:, col]
            self.min[col] = XX.min1()
            if self.min[col] is None or np.isnan(self.min[col]):
                self.min[col] = -1e10
            else:
                self.min[col] -= 1
            XX.replace(None, self.min[col])
            X[:, col] = XX
            assert X[dt.isna(dt.f[col]), col].nrows == 0
        X = X.to_numpy()

        model.fit(X, y)
        importances = np.array(model.feature_importances_)
        self.set_model_properties(model=model,
                                  features=orig_cols,
                                  importances=importances.tolist(),
                                  iterations=self.params['n_estimators']) 
Example #27
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_has_fit_parameter():
    assert not has_fit_parameter(KNeighborsClassifier, "sample_weight")
    assert has_fit_parameter(RandomForestRegressor, "sample_weight")
    assert has_fit_parameter(SVR, "sample_weight")
    assert has_fit_parameter(SVR(), "sample_weight")

    class TestClassWithDeprecatedFitMethod:
        @deprecated("Deprecated for the purpose of testing has_fit_parameter")
        def fit(self, X, y, sample_weight=None):
            pass

    assert has_fit_parameter(TestClassWithDeprecatedFitMethod,
                             "sample_weight"), \
        "has_fit_parameter fails for class with deprecated fit method." 
Example #28
Source File: model_wrapper.py    From AMPL with MIT License 5 votes vote down vote up
def reload_model(self, reload_dir):
        """Loads a saved random forest model from the specified directory. Also loads any transformers that
        were saved with it.

        Args:
            reload_dir (str): Directory where saved model is located.
            model_dataset (ModelDataset Object): contains the current full dataset

        Side effects:
            Resets the value of model, transformers, and transformers_x

        """
        if self.params.prediction_type == 'regression':
            rf_model = RandomForestRegressor(n_estimators=self.params.rf_estimators,
                                             max_features=self.params.rf_max_features,
                                             max_depth=self.params.rf_max_depth,
                                             n_jobs=-1)
            if self.params.transformers:
                self.log.info("Reloading transformers from file %s" % self.params.transformer_key)
                if self.params.datastore:
                    self.transformers, self.transformers_x = dsf.retrieve_dataset_by_datasetkey(dataset_key = self.params.transformer_key,
                                   bucket = self.params.transformer_bucket,
                                   client= self.ds_client )
                else:
                    self.transformers, self.transformers_x = pickle.load(open( self.params.transformer_key, 'rb' ))
                # TODO: We shouldn't be reloading the transformers here - that should only happen when we load
                # TODO: a previously trained model to run predictions on a new dataset.
        else:
            rf_model = RandomForestClassifier(n_estimators=self.params.rf_estimators,
                                              max_features=self.params.rf_max_features,
                                              max_depth=self.params.rf_max_depth,
                                              n_jobs=-1)

        self.model = dc.models.sklearn_models.SklearnModel(rf_model, model_dir=reload_dir)
        self.model.reload()

    # **************************************************************************************** 
Example #29
Source File: features.py    From lumin with Apache License 2.0 5 votes vote down vote up
def get_rf_feat_importance(rf:Union[RandomForestRegressor,RandomForestClassifier], inputs:pd.DataFrame, targets:np.ndarray, weights:Optional[np.ndarray]=None) -> pd.DataFrame:
    r'''
    Compute feature importance for a Random Forest model using rfpimp.

    Arguments:
        rf: trained Random Forest model
        inputs: input data as Pandas DataFrame
        targets: target data as Numpy array
        weights: Optional data weights as Numpy array
    '''

    return importances(rf, inputs, targets, features=inputs.columns, sample_weights=weights).reset_index() 
Example #30
Source File: test_forest.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_min_impurity_decrease():
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    all_estimators = [RandomForestClassifier, RandomForestRegressor,
                      ExtraTreesClassifier, ExtraTreesRegressor]

    for Estimator in all_estimators:
        est = Estimator(min_impurity_decrease=0.1)
        est.fit(X, y)
        for tree in est.estimators_:
            # Simply check if the parameter is passed on correctly. Tree tests
            # will suffice for the actual working of this param
            assert_equal(tree.min_impurity_decrease, 0.1)