Python sklearn.ensemble.RandomForestRegressor() Examples

The following are 30 code examples for showing how to use sklearn.ensemble.RandomForestRegressor(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.ensemble , or try the search function .

Example 1
Project: OpenChem   Author: Mariewelt   File: vanilla_model.py    License: MIT License 6 votes vote down vote up
def __init__(self, model_type='classifier', feature_type='fingerprints',
                 n_estimators=100, n_ensemble=5):
        super(RandomForestQSAR, self).__init__()
        self.n_estimators = n_estimators
        self.n_ensemble = n_ensemble
        self.model = []
        self.model_type = model_type
        if self.model_type == 'classifier':
            for i in range(n_ensemble):
                self.model.append(RFC(n_estimators=n_estimators))
        elif self.model_type == 'regressor':
            for i in range(n_ensemble):
                self.model.append(RFR(n_estimators=n_estimators))
        else:
            raise ValueError('invalid value for argument')
        self.feature_type = feature_type
        if self.feature_type == 'descriptors':
            self.calc = Calculator(descriptors, ignore_3D=True)
            self.desc_mean = [0]*self.n_ensemble 
Example 2
Project: deepchem   Author: deepchem   File: test_overfit.py    License: MIT License 6 votes vote down vote up
def test_sklearn_regression_overfit(self):
    """Test that sklearn models can overfit simple regression datasets."""
    n_samples = 10
    n_features = 3
    n_tasks = 1

    # Generate dummy dataset
    np.random.seed(123)
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.random.rand(n_samples, n_tasks)
    w = np.ones((n_samples, n_tasks))
    dataset = dc.data.NumpyDataset(X, y, w, ids)

    regression_metric = dc.metrics.Metric(dc.metrics.r2_score)
    sklearn_model = RandomForestRegressor()
    model = dc.models.SklearnModel(sklearn_model)

    # Fit trained model
    model.fit(dataset)
    model.save()

    # Eval model on train
    scores = model.evaluate(dataset, [regression_metric])
    assert scores[regression_metric.name] > .7 
Example 3
Project: news-popularity-prediction   Author: MKLab-ITI   File: ranking.py    License: Apache License 2.0 6 votes vote down vote up
def get_regressor_fitted(file_path,
                         X_train,
                         X_test,
                         y_train,
                         y_test):
    if os.path.exists(file_path):
        try:
            regressor_fitted = load_sklearn_model(file_path)
        except EOFError as e:
            print(file_path)
            raise e
    else:
        regressor = RandomForestRegressor(n_estimators=50,
                                          criterion="mse",
                                          max_features="auto",
                                          n_jobs=get_threads_number())

        regressor_fitted = regressor.fit(X_train, y_train)

        store_sklearn_model(file_path, regressor_fitted)
    return regressor_fitted 
Example 4
def Train(data, treecount, tezh, yanzhgdata):
    model = RF(n_estimators=treecount, max_features=tezh)
    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
Example 5
Project: mlens   Author: flennerhag   File: friedman_scores.py    License: MIT License 6 votes vote down vote up
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens 
Example 6
def regression_rf(x,y):
    '''
        Estimate a random forest regressor
    '''
    # create the regressor object
    random_forest = en.RandomForestRegressor(
        min_samples_split=80, random_state=666, 
        max_depth=5, n_estimators=10)

    # estimate the model
    random_forest.fit(x,y)

    # return the object
    return random_forest

# the file name of the dataset 
Example 7
Project: m2cgen   Author: BayesWitnesses   File: test_ensemble.py    License: MIT License 6 votes vote down vote up
def test_single_condition():
    estimator = ensemble.RandomForestRegressor(n_estimators=2, random_state=1)

    estimator.fit([[1], [2]], [1, 2])

    assembler = assemblers.RandomForestModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.BinNumExpr(
        ast.BinNumExpr(
            ast.NumVal(1.0),
            ast.IfExpr(
                ast.CompExpr(
                    ast.FeatureRef(0),
                    ast.NumVal(1.5),
                    ast.CompOpType.LTE),
                ast.NumVal(1.0),
                ast.NumVal(2.0)),
            ast.BinNumOpType.ADD),
        ast.NumVal(0.5),
        ast.BinNumOpType.MUL)

    assert utils.cmp_exprs(actual, expected) 
Example 8
Project: drifter_ml   Author: EricSchles   File: test_regression_tests.py    License: MIT License 6 votes vote down vote up
def generate_regression_data_and_models():
    df = pd.DataFrame()
    for _ in range(1000):
        a = np.random.normal(0, 1)
        b = np.random.normal(0, 3)
        c = np.random.normal(12, 4)
        target = a + b + c
        df = df.append({
            "A": a,
            "B": b,
            "C": c,
            "target": target
        }, ignore_index=True)

    reg1 = tree.DecisionTreeRegressor()
    reg2 = ensemble.RandomForestRegressor()
    column_names = ["A", "B", "C"]
    target_name = "target"
    X = df[column_names]
    reg1.fit(X, df[target_name])
    reg2.fit(X, df[target_name])
    return df, column_names, target_name, reg1, reg2 
Example 9
Project: pyGPGO   Author: josejimenezluna   File: RandomForest.py    License: MIT License 6 votes vote down vote up
def fit(self, X, y):
        """
        Fit a Random Forest model to data `X` and targets `y`.

        Parameters
        ----------
        X : array-like
            Input values.
        y: array-like
            Target values.
        """
        self.X = X
        self.y = y
        self.n = self.X.shape[0]
        self.model = RandomForestRegressor(**self.params)
        self.model.fit(X, y) 
Example 10
Project: Pyspatialml   Author: stevenpawley   File: test_prediction.py    License: GNU General Public License v3.0 6 votes vote down vote up
def test_regression(self):
        training_pt = gpd.read_file(ms.meuse)
        training = self.stack_meuse.extract_vector(gdf=training_pt)
        training["zinc"] = training_pt["zinc"]
        training["cadmium"] = training_pt["cadmium"]
        training["copper"] = training_pt["copper"]
        training["lead"] = training_pt["lead"]
        training = training.dropna()

        # single target regression
        regr = RandomForestRegressor(n_estimators=50)
        X = training.loc[:, self.stack_meuse.names]
        y = training["zinc"]
        regr.fit(X, y)

        single_regr = self.stack_meuse.predict(regr)
        self.assertIsInstance(single_regr, Raster)
        self.assertEqual(single_regr.count, 1)

        # multi-target regression
        y = training.loc[:, ["zinc", "cadmium", "copper", "lead"]]
        regr.fit(X, y)
        multi_regr = self.stack_meuse.predict(regr)
        self.assertIsInstance(multi_regr, Raster)
        self.assertEqual(multi_regr.count, 4) 
Example 11
Project: HpBandSter   Author: automl   File: arif.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, losses, configs=None):

        if configs is None:
            configs = [[]]*len(times)

        # convert learning curves into X and y data

        X = []
        y = []

        for l,c in zip(losses, configs):
            l = self.apply_differencing(l)

            for i in range(self.order, len(l)):
                X.append(np.hstack([l[i-self.order:i], c]))
                y.append(l[i])

        self.X = np.array(X)
        self.y = np.array(y)


        self.rfr = rfr().fit(self.X,self.y) 
Example 12
Project: HpBandSter   Author: automl   File: arif.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def extend_partial(self, obs_losses, num_steps, config=None):
        # TODO: add variance predictions
        if config is None:
            config = []

        d_losses = self.apply_differencing(obs_losses)


        for t in range(num_steps):
            x = np.hstack([d_losses[-self.order:], config])
            y = self.rfr.predict([x])
            d_losses = np.hstack([d_losses, y])


        prediction = self.invert_differencing( obs_losses, d_losses[-num_steps:])

        return(prediction) 
Example 13
Project: coremltools   Author: apple   File: test_io_types.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_random_forest_regressor(self):
        for dtype in self.number_data_type.keys():
            scikit_model = RandomForestRegressor(random_state=1)
            data = self.scikit_data["data"].astype(dtype)
            target = self.scikit_data["target"].astype(dtype)
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            test_data = data[0].reshape(1, -1)
            self._check_tree_model(spec, "multiArrayType", "doubleType", 1)
            coreml_model = create_model(spec)
            try:
                self.assertEqual(
                    scikit_model.predict(test_data)[0].dtype,
                    type(coreml_model.predict({"data": test_data})["target"]),
                )
                self.assertAlmostEqual(
                    scikit_model.predict(test_data)[0],
                    coreml_model.predict({"data": test_data})["target"],
                    msg="{} != {} for Dtype: {}".format(
                        scikit_model.predict(test_data)[0],
                        coreml_model.predict({"data": test_data})["target"],
                        dtype,
                    ),
                )
            except RuntimeError:
                print("{} not supported. ".format(dtype)) 
Example 14
def _train_convert_evaluate_assert(self, **scikit_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        scikit_model = RandomForestRegressor(random_state=1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df["prediction"] = scikit_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_regressor(spec, df, verbose=False)
            self._check_metrics(metrics, scikit_params) 
Example 15
Project: civisml-extensions   Author: civisanalytics   File: test_stacking.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_smoke_regression_methods(regression_test_data, n_jobs):
    """Construct, fit, and predict on realistic problem.
    """
    xtrain = regression_test_data['x']
    ytrain = regression_test_data['y']

    rng = np.random.RandomState(17)
    est_list = [('lr', LinearRegression()),
                ('rf', RandomForestRegressor(random_state=rng,
                                             n_estimators=10)),
                ('nnls', NonNegativeLinearRegression())]
    sm = StackedRegressor(est_list, n_jobs=n_jobs)
    sm.fit(xtrain, ytrain)
    sm.predict(xtrain)
    sm.score(xtrain, ytrain)

    with pytest.raises(AttributeError):
        sm.predict_proba(xtrain) 
Example 16
Project: civisml-extensions   Author: civisanalytics   File: test_stacking.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_smoke_multiout_regression_methods(n_jobs):
    """Construct, fit, and predict on realistic problem.
    """
    X, y = make_regression(random_state=7, n_samples=100, n_features=10,
                           n_informative=4, n_targets=2)

    rng = np.random.RandomState(17)
    est_list = [('lr', LinearRegression()),
                ('rf', RandomForestRegressor(random_state=rng,
                                             n_estimators=10)),
                ('metalr', LinearRegression())]
    sm = StackedRegressor(est_list, n_jobs=n_jobs)
    sm.fit(X, y)
    sm.predict(X)
    sm.score(X, y)

    with pytest.raises(AttributeError):
        sm.predict_proba(X) 
Example 17
Project: civisml-extensions   Author: civisanalytics   File: test_stacking.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_integration_regression(regression_test_data, n_jobs):
    """Construct, fit, and predict on realistic problem. Compare goodness of
    fit of stacked model vs. individual base estimators.
    """
    xtrain = regression_test_data['x']
    ytrain = regression_test_data['y']
    xtest = regression_test_data['xtest']
    ytest = regression_test_data['ytest']

    sr = StackedRegressor([('rf', RandomForestRegressor(random_state=7,
                                                        n_estimators=10)),
                           ('lr', LinearRegression()),
                           ('metalr', NonNegativeLinearRegression())],
                          n_jobs=n_jobs)
    rf = RandomForestRegressor(random_state=7, n_estimators=10)
    lr = LinearRegression()
    sr_mse = fit_predict_measure_reg(sr, xtrain, ytrain, xtest, ytest)
    rf_mse = fit_predict_measure_reg(rf, xtrain, ytrain, xtest, ytest)
    lr_mse = fit_predict_measure_reg(lr, xtrain, ytrain, xtest, ytest)

    # Stacked regressor should perform better than its base estimators on this
    # data.
    assert sr_mse < rf_mse
    assert sr_mse < lr_mse
    assert sr_mse < 1.5    # Sanity check 
Example 18
Project: mljar-supervised   Author: mljar   File: random_forest.py    License: MIT License 6 votes vote down vote up
def __init__(self, params):
        super(RandomForestRegressorAlgorithm, self).__init__(params)
        logger.debug("RandomForestRegressorAlgorithm.__init__")

        self.library_version = sklearn.__version__
        self.trees_in_step = regression_additional.get("trees_in_step", 5)
        self.max_steps = regression_additional.get("max_steps", 3)
        self.early_stopping_rounds = regression_additional.get(
            "early_stopping_rounds", 50
        )
        self.model = RandomForestRegressor(
            n_estimators=self.trees_in_step,
            criterion=params.get("criterion", "mse"),
            max_features=params.get("max_features", 0.8),
            min_samples_split=params.get("min_samples_split", 4),
            warm_start=True,
            n_jobs=-1,
            random_state=params.get("seed", 1),
        ) 
Example 19
Project: scan   Author: VikParuchuri   File: scorer.py    License: GNU Affero General Public License v3.0 6 votes vote down vote up
def __init__(self, text, scores):
        self.text = text
        self.scores = scores
        self.feature_generator = FeatureGenerator()
        self.classifier = RandomForestRegressor(
            n_estimators=100,
            min_samples_split=4,
            min_samples_leaf=3,
            random_state=1
        )

        unique_scores = set(scores)
        if len(unique_scores) <= self.classification_max:
            self.classifier = RandomForestClassifier(
                n_estimators=100,
                min_samples_split=4,
                min_samples_leaf=3,
                random_state=1
            )

        self.fit_feats()
        self.fit_done = False 
Example 20
Project: Semantic-Texual-Similarity-Toolkits   Author: rgtjf   File: classifier.py    License: MIT License 6 votes vote down vote up
def train_model(self, train_file_path, model_path):
        print("==> Load the data ...")
        X_train, Y_train = self.load_file(train_file_path)
        print(train_file_path, shape(X_train))

        print("==> Train the model ...")
        min_max_scaler = preprocessing.MaxAbsScaler()
        X_train_minmax = min_max_scaler.fit_transform(X_train)
        clf = RandomForestRegressor(n_estimators=self.n_estimators)
        clf.fit(X_train_minmax.toarray(), Y_train)

        print("==> Save the model ...")
        pickle.dump(clf, open(model_path, 'wb'))

        scaler_path = model_path.replace('.pkl', '.scaler.pkl')
        pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
        return clf 
Example 21
Project: jh-kaggle-util   Author: jeffheaton   File: ensemble_glm.py    License: Apache License 2.0 6 votes vote down vote up
def fit_ensemble(x,y):
    fit_type = jhkaggle.jhkaggle_config['FIT_TYPE']
    if 1:
        if fit_type == jhkaggle.const.FIT_TYPE_BINARY_CLASSIFICATION:
            blend = SGDClassifier(loss="log", penalty="elasticnet")  # LogisticRegression()
        else:
            # blend = SGDRegressor()
            #blend = LinearRegression()
            #blend = RandomForestRegressor(n_estimators=10, n_jobs=-1, max_depth=5, criterion='mae')
            blend = LassoLarsCV(normalize=True)
            #blend = ElasticNetCV(normalize=True)
            #blend = LinearRegression(normalize=True)
        blend.fit(x, y)
    else:
        blend = LogisticRegression()
        blend.fit(x, y)


    return blend 
Example 22
Project: jh-kaggle-util   Author: jeffheaton   File: models.py    License: Apache License 2.0 6 votes vote down vote up
def run_sklearn():
  n_trees = 100
  n_folds = 3

  # https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/
  alg_list = [
      ['lreg',LinearRegression()],
      ['rforest',RandomForestRegressor(n_estimators=1000, n_jobs=-1, max_depth=3)],
      ['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=2)],
      ['adaboost',AdaBoostRegressor(base_estimator=None, n_estimators=600, learning_rate=1.0)],
      ['knn', sklearn.neighbors.KNeighborsRegressor(n_neighbors=5)]
  ]

  start_time = time.time()
  for name,alg in alg_list:
      train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False)
      train.run()
      train = None
  elapsed_time = time.time() - start_time
  print("Elapsed time: {}".format(jhkaggle.util.hms_string(elapsed_time))) 
Example 23
Project: bitpredict   Author: cbyn   File: strategy.py    License: MIT License 6 votes vote down vote up
def fit_and_trade(data, cols, split, threshold):
    '''
    Fits and backtests a theoretical trading strategy
    '''
    data = data[data.width > 0]
    X = data[cols]
    y = data.mid30
    X_train = X.iloc[:split]
    X_test = X.iloc[split:]
    y_train = y.iloc[:split]
    y_test = y.iloc[split:]
    regressor = RandomForestRegressor(n_estimators=100,
                                      min_samples_leaf=500,
                                      random_state=42,
                                      n_jobs=-1)
    regressor.fit(X_train.values, y_train.values)
    trade(X_test.values, y_test.values, regressor, threshold) 
Example 24
Project: interpret-text   Author: interpretml   File: common_utils.py    License: MIT License 5 votes vote down vote up
def create_sklearn_random_forest_regressor(X, y):
    rfr = ensemble.RandomForestRegressor(max_depth=4, random_state=777)
    model = rfr.fit(X, y)
    return model 
Example 25
Project: kaggle-code   Author: CNuge   File: hockey_front_to_back.py    License: MIT License 5 votes vote down vote up
def fit(self, X, y):
		"""load the data in, initiate the models"""
		self.X = X
		self.y = y
		self.opt_XGBoost_reg = xgb.XGBRegressor(**self.opt_xgb_params)
		self.opt_forest_reg = RandomForestRegressor(**self.opt_rf_params)
		self.opt_svm_reg = SVR(**self.opt_svm_params)
		""" fit the models """
		self.opt_XGBoost_reg.fit(self.X ,self.y)
		self.opt_forest_reg.fit(self.X ,self.y)
		self.opt_svm_reg.fit(self.X ,self.y) 
Example 26
Project: missingpy   Author: epsilon-machine   File: test_missforest.py    License: GNU General Public License v3.0 5 votes vote down vote up
def test_missforest_numerical_single():
    # Test imputation with default parameter values

    # Test with a single missing value
    df = np.array([
        [1,      0,      0,      1],
        [2,      1,      2,      2],
        [3,      2,      3,      2],
        [np.nan, 4,      5,      5],
        [6,      7,      6,      7],
        [8,      8,      8,      8],
        [16,     15,     18,    19],
    ])
    statistics_mean = np.nanmean(df, axis=0)

    y = df[:, 0]
    X = df[:, 1:]
    good_rows = np.where(~np.isnan(y))[0]
    bad_rows = np.where(np.isnan(y))[0]

    rf = RandomForestRegressor(n_estimators=10, random_state=1337)
    rf.fit(X=X[good_rows], y=y[good_rows])
    pred_val = rf.predict(X[bad_rows])

    df_imputed = np.array([
        [1,         0,      0,      1],
        [2,         1,      2,      2],
        [3,         2,      3,      2],
        [pred_val,  4,      5,      5],
        [6,         7,      6,      7],
        [8,         8,      8,      8],
        [16,        15,     18,    19],
    ])

    imputer = MissForest(n_estimators=10, random_state=1337)
    assert_array_equal(imputer.fit_transform(df), df_imputed)
    assert_array_equal(imputer.statistics_.get('col_means'), statistics_mean) 
Example 27
Project: deepchem   Author: deepchem   File: nci_rf.py    License: MIT License 5 votes vote down vote up
def model_builder(model_dir):
  sklearn_model = RandomForestRegressor(n_estimators=500)
  return SklearnModel(sklearn_model, model_dir) 
Example 28
Project: deepchem   Author: deepchem   File: KINASE_rf_model.py    License: MIT License 5 votes vote down vote up
def task_model_builder(model_dir):
  sklearn_model = RandomForestRegressor(
      n_estimators=100, max_features=int(num_features/3),
      min_samples_split=5, n_jobs=-1)
  return dc.models.SklearnModel(sklearn_model, model_dir) 
Example 29
Project: deepchem   Author: deepchem   File: UV_rf_model.py    License: MIT License 5 votes vote down vote up
def task_model_builder(model_dir):
  sklearn_model = RandomForestRegressor(
      n_estimators=100,
      max_features=int(num_features / 3),
      min_samples_split=5,
      n_jobs=-1)
  return dc.models.SklearnModel(sklearn_model, model_dir) 
Example 30
Project: deepchem   Author: deepchem   File: FACTORS_rf_model.py    License: MIT License 5 votes vote down vote up
def task_model_builder(model_dir):
  sklearn_model = RandomForestRegressor(
      n_estimators=100, max_features=int(num_features/3),
      min_samples_split=5, n_jobs=-1)
  return dc.models.SklearnModel(sklearn_model, model_dir)