Python sklearn.ensemble.RandomForestRegressor() Examples
The following are 30 code examples for showing how to use sklearn.ensemble.RandomForestRegressor(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
You may check out the related API usage on the sidebar.
You may also want to check out all available functions/classes of the module
sklearn.ensemble
, or try the search function
.
Example 1
Project: OpenChem Author: Mariewelt File: vanilla_model.py License: MIT License | 6 votes |
def __init__(self, model_type='classifier', feature_type='fingerprints', n_estimators=100, n_ensemble=5): super(RandomForestQSAR, self).__init__() self.n_estimators = n_estimators self.n_ensemble = n_ensemble self.model = [] self.model_type = model_type if self.model_type == 'classifier': for i in range(n_ensemble): self.model.append(RFC(n_estimators=n_estimators)) elif self.model_type == 'regressor': for i in range(n_ensemble): self.model.append(RFR(n_estimators=n_estimators)) else: raise ValueError('invalid value for argument') self.feature_type = feature_type if self.feature_type == 'descriptors': self.calc = Calculator(descriptors, ignore_3D=True) self.desc_mean = [0]*self.n_ensemble
Example 2
Project: deepchem Author: deepchem File: test_overfit.py License: MIT License | 6 votes |
def test_sklearn_regression_overfit(self): """Test that sklearn models can overfit simple regression datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.rand(n_samples, n_tasks) w = np.ones((n_samples, n_tasks)) dataset = dc.data.NumpyDataset(X, y, w, ids) regression_metric = dc.metrics.Metric(dc.metrics.r2_score) sklearn_model = RandomForestRegressor() model = dc.models.SklearnModel(sklearn_model) # Fit trained model model.fit(dataset) model.save() # Eval model on train scores = model.evaluate(dataset, [regression_metric]) assert scores[regression_metric.name] > .7
Example 3
Project: news-popularity-prediction Author: MKLab-ITI File: ranking.py License: Apache License 2.0 | 6 votes |
def get_regressor_fitted(file_path, X_train, X_test, y_train, y_test): if os.path.exists(file_path): try: regressor_fitted = load_sklearn_model(file_path) except EOFError as e: print(file_path) raise e else: regressor = RandomForestRegressor(n_estimators=50, criterion="mse", max_features="auto", n_jobs=get_threads_number()) regressor_fitted = regressor.fit(X_train, y_train) store_sklearn_model(file_path, regressor_fitted) return regressor_fitted
Example 4
Project: Machine-Learning-for-Beginner-by-Python3 Author: Anfany File: pm25_RF_Regression.py License: MIT License | 6 votes |
def Train(data, treecount, tezh, yanzhgdata): model = RF(n_estimators=treecount, max_features=tezh) model.fit(data[:, :-1], data[:, -1]) # 给出训练数据的预测值 train_out = model.predict(data[:, :-1]) # 计算MSE train_mse = mse(data[:, -1], train_out) # 给出验证数据的预测值 add_yan = model.predict(yanzhgdata[:, :-1]) # 计算MSE add_mse = mse(yanzhgdata[:, -1], add_yan) print(train_mse, add_mse) return train_mse, add_mse # 最终确定组合的函数
Example 5
Project: mlens Author: flennerhag File: friedman_scores.py License: MIT License | 6 votes |
def build_ensemble(**kwargs): """Generate ensemble.""" ens = SuperLearner(**kwargs) prep = {'Standard Scaling': [StandardScaler()], 'Min Max Scaling': [MinMaxScaler()], 'No Preprocessing': []} est = {'Standard Scaling': [ElasticNet(), Lasso(), KNeighborsRegressor()], 'Min Max Scaling': [SVR()], 'No Preprocessing': [RandomForestRegressor(random_state=SEED), GradientBoostingRegressor()]} ens.add(est, prep) ens.add(GradientBoostingRegressor(), meta=True) return ens
Example 6
Project: practicalDataAnalysisCookbook Author: drabastomek File: regression_randomForest.py License: GNU General Public License v2.0 | 6 votes |
def regression_rf(x,y): ''' Estimate a random forest regressor ''' # create the regressor object random_forest = en.RandomForestRegressor( min_samples_split=80, random_state=666, max_depth=5, n_estimators=10) # estimate the model random_forest.fit(x,y) # return the object return random_forest # the file name of the dataset
Example 7
Project: m2cgen Author: BayesWitnesses File: test_ensemble.py License: MIT License | 6 votes |
def test_single_condition(): estimator = ensemble.RandomForestRegressor(n_estimators=2, random_state=1) estimator.fit([[1], [2]], [1, 2]) assembler = assemblers.RandomForestModelAssembler(estimator) actual = assembler.assemble() expected = ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(1.0), ast.IfExpr( ast.CompExpr( ast.FeatureRef(0), ast.NumVal(1.5), ast.CompOpType.LTE), ast.NumVal(1.0), ast.NumVal(2.0)), ast.BinNumOpType.ADD), ast.NumVal(0.5), ast.BinNumOpType.MUL) assert utils.cmp_exprs(actual, expected)
Example 8
Project: drifter_ml Author: EricSchles File: test_regression_tests.py License: MIT License | 6 votes |
def generate_regression_data_and_models(): df = pd.DataFrame() for _ in range(1000): a = np.random.normal(0, 1) b = np.random.normal(0, 3) c = np.random.normal(12, 4) target = a + b + c df = df.append({ "A": a, "B": b, "C": c, "target": target }, ignore_index=True) reg1 = tree.DecisionTreeRegressor() reg2 = ensemble.RandomForestRegressor() column_names = ["A", "B", "C"] target_name = "target" X = df[column_names] reg1.fit(X, df[target_name]) reg2.fit(X, df[target_name]) return df, column_names, target_name, reg1, reg2
Example 9
Project: pyGPGO Author: josejimenezluna File: RandomForest.py License: MIT License | 6 votes |
def fit(self, X, y): """ Fit a Random Forest model to data `X` and targets `y`. Parameters ---------- X : array-like Input values. y: array-like Target values. """ self.X = X self.y = y self.n = self.X.shape[0] self.model = RandomForestRegressor(**self.params) self.model.fit(X, y)
Example 10
Project: Pyspatialml Author: stevenpawley File: test_prediction.py License: GNU General Public License v3.0 | 6 votes |
def test_regression(self): training_pt = gpd.read_file(ms.meuse) training = self.stack_meuse.extract_vector(gdf=training_pt) training["zinc"] = training_pt["zinc"] training["cadmium"] = training_pt["cadmium"] training["copper"] = training_pt["copper"] training["lead"] = training_pt["lead"] training = training.dropna() # single target regression regr = RandomForestRegressor(n_estimators=50) X = training.loc[:, self.stack_meuse.names] y = training["zinc"] regr.fit(X, y) single_regr = self.stack_meuse.predict(regr) self.assertIsInstance(single_regr, Raster) self.assertEqual(single_regr.count, 1) # multi-target regression y = training.loc[:, ["zinc", "cadmium", "copper", "lead"]] regr.fit(X, y) multi_regr = self.stack_meuse.predict(regr) self.assertIsInstance(multi_regr, Raster) self.assertEqual(multi_regr.count, 4)
Example 11
Project: HpBandSter Author: automl File: arif.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, losses, configs=None): if configs is None: configs = [[]]*len(times) # convert learning curves into X and y data X = [] y = [] for l,c in zip(losses, configs): l = self.apply_differencing(l) for i in range(self.order, len(l)): X.append(np.hstack([l[i-self.order:i], c])) y.append(l[i]) self.X = np.array(X) self.y = np.array(y) self.rfr = rfr().fit(self.X,self.y)
Example 12
Project: HpBandSter Author: automl File: arif.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def extend_partial(self, obs_losses, num_steps, config=None): # TODO: add variance predictions if config is None: config = [] d_losses = self.apply_differencing(obs_losses) for t in range(num_steps): x = np.hstack([d_losses[-self.order:], config]) y = self.rfr.predict([x]) d_losses = np.hstack([d_losses, y]) prediction = self.invert_differencing( obs_losses, d_losses[-num_steps:]) return(prediction)
Example 13
Project: coremltools Author: apple File: test_io_types.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_random_forest_regressor(self): for dtype in self.number_data_type.keys(): scikit_model = RandomForestRegressor(random_state=1) data = self.scikit_data["data"].astype(dtype) target = self.scikit_data["target"].astype(dtype) scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target) test_data = data[0].reshape(1, -1) self._check_tree_model(spec, "multiArrayType", "doubleType", 1) coreml_model = create_model(spec) try: self.assertEqual( scikit_model.predict(test_data)[0].dtype, type(coreml_model.predict({"data": test_data})["target"]), ) self.assertAlmostEqual( scikit_model.predict(test_data)[0], coreml_model.predict({"data": test_data})["target"], msg="{} != {} for Dtype: {}".format( scikit_model.predict(test_data)[0], coreml_model.predict({"data": test_data})["target"], dtype, ), ) except RuntimeError: print("{} not supported. ".format(dtype))
Example 14
Project: coremltools Author: apple File: test_random_forest_regression_numeric.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def _train_convert_evaluate_assert(self, **scikit_params): """ Train a scikit-learn model, convert it and then evaluate it with CoreML """ scikit_model = RandomForestRegressor(random_state=1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) if _is_macos() and _macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df["prediction"] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_regressor(spec, df, verbose=False) self._check_metrics(metrics, scikit_params)
Example 15
Project: civisml-extensions Author: civisanalytics File: test_stacking.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_smoke_regression_methods(regression_test_data, n_jobs): """Construct, fit, and predict on realistic problem. """ xtrain = regression_test_data['x'] ytrain = regression_test_data['y'] rng = np.random.RandomState(17) est_list = [('lr', LinearRegression()), ('rf', RandomForestRegressor(random_state=rng, n_estimators=10)), ('nnls', NonNegativeLinearRegression())] sm = StackedRegressor(est_list, n_jobs=n_jobs) sm.fit(xtrain, ytrain) sm.predict(xtrain) sm.score(xtrain, ytrain) with pytest.raises(AttributeError): sm.predict_proba(xtrain)
Example 16
Project: civisml-extensions Author: civisanalytics File: test_stacking.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_smoke_multiout_regression_methods(n_jobs): """Construct, fit, and predict on realistic problem. """ X, y = make_regression(random_state=7, n_samples=100, n_features=10, n_informative=4, n_targets=2) rng = np.random.RandomState(17) est_list = [('lr', LinearRegression()), ('rf', RandomForestRegressor(random_state=rng, n_estimators=10)), ('metalr', LinearRegression())] sm = StackedRegressor(est_list, n_jobs=n_jobs) sm.fit(X, y) sm.predict(X) sm.score(X, y) with pytest.raises(AttributeError): sm.predict_proba(X)
Example 17
Project: civisml-extensions Author: civisanalytics File: test_stacking.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_integration_regression(regression_test_data, n_jobs): """Construct, fit, and predict on realistic problem. Compare goodness of fit of stacked model vs. individual base estimators. """ xtrain = regression_test_data['x'] ytrain = regression_test_data['y'] xtest = regression_test_data['xtest'] ytest = regression_test_data['ytest'] sr = StackedRegressor([('rf', RandomForestRegressor(random_state=7, n_estimators=10)), ('lr', LinearRegression()), ('metalr', NonNegativeLinearRegression())], n_jobs=n_jobs) rf = RandomForestRegressor(random_state=7, n_estimators=10) lr = LinearRegression() sr_mse = fit_predict_measure_reg(sr, xtrain, ytrain, xtest, ytest) rf_mse = fit_predict_measure_reg(rf, xtrain, ytrain, xtest, ytest) lr_mse = fit_predict_measure_reg(lr, xtrain, ytrain, xtest, ytest) # Stacked regressor should perform better than its base estimators on this # data. assert sr_mse < rf_mse assert sr_mse < lr_mse assert sr_mse < 1.5 # Sanity check
Example 18
Project: mljar-supervised Author: mljar File: random_forest.py License: MIT License | 6 votes |
def __init__(self, params): super(RandomForestRegressorAlgorithm, self).__init__(params) logger.debug("RandomForestRegressorAlgorithm.__init__") self.library_version = sklearn.__version__ self.trees_in_step = regression_additional.get("trees_in_step", 5) self.max_steps = regression_additional.get("max_steps", 3) self.early_stopping_rounds = regression_additional.get( "early_stopping_rounds", 50 ) self.model = RandomForestRegressor( n_estimators=self.trees_in_step, criterion=params.get("criterion", "mse"), max_features=params.get("max_features", 0.8), min_samples_split=params.get("min_samples_split", 4), warm_start=True, n_jobs=-1, random_state=params.get("seed", 1), )
Example 19
Project: scan Author: VikParuchuri File: scorer.py License: GNU Affero General Public License v3.0 | 6 votes |
def __init__(self, text, scores): self.text = text self.scores = scores self.feature_generator = FeatureGenerator() self.classifier = RandomForestRegressor( n_estimators=100, min_samples_split=4, min_samples_leaf=3, random_state=1 ) unique_scores = set(scores) if len(unique_scores) <= self.classification_max: self.classifier = RandomForestClassifier( n_estimators=100, min_samples_split=4, min_samples_leaf=3, random_state=1 ) self.fit_feats() self.fit_done = False
Example 20
Project: Semantic-Texual-Similarity-Toolkits Author: rgtjf File: classifier.py License: MIT License | 6 votes |
def train_model(self, train_file_path, model_path): print("==> Load the data ...") X_train, Y_train = self.load_file(train_file_path) print(train_file_path, shape(X_train)) print("==> Train the model ...") min_max_scaler = preprocessing.MaxAbsScaler() X_train_minmax = min_max_scaler.fit_transform(X_train) clf = RandomForestRegressor(n_estimators=self.n_estimators) clf.fit(X_train_minmax.toarray(), Y_train) print("==> Save the model ...") pickle.dump(clf, open(model_path, 'wb')) scaler_path = model_path.replace('.pkl', '.scaler.pkl') pickle.dump(min_max_scaler, open(scaler_path, 'wb')) return clf
Example 21
Project: jh-kaggle-util Author: jeffheaton File: ensemble_glm.py License: Apache License 2.0 | 6 votes |
def fit_ensemble(x,y): fit_type = jhkaggle.jhkaggle_config['FIT_TYPE'] if 1: if fit_type == jhkaggle.const.FIT_TYPE_BINARY_CLASSIFICATION: blend = SGDClassifier(loss="log", penalty="elasticnet") # LogisticRegression() else: # blend = SGDRegressor() #blend = LinearRegression() #blend = RandomForestRegressor(n_estimators=10, n_jobs=-1, max_depth=5, criterion='mae') blend = LassoLarsCV(normalize=True) #blend = ElasticNetCV(normalize=True) #blend = LinearRegression(normalize=True) blend.fit(x, y) else: blend = LogisticRegression() blend.fit(x, y) return blend
Example 22
Project: jh-kaggle-util Author: jeffheaton File: models.py License: Apache License 2.0 | 6 votes |
def run_sklearn(): n_trees = 100 n_folds = 3 # https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/ alg_list = [ ['lreg',LinearRegression()], ['rforest',RandomForestRegressor(n_estimators=1000, n_jobs=-1, max_depth=3)], ['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=2)], ['adaboost',AdaBoostRegressor(base_estimator=None, n_estimators=600, learning_rate=1.0)], ['knn', sklearn.neighbors.KNeighborsRegressor(n_neighbors=5)] ] start_time = time.time() for name,alg in alg_list: train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False) train.run() train = None elapsed_time = time.time() - start_time print("Elapsed time: {}".format(jhkaggle.util.hms_string(elapsed_time)))
Example 23
Project: bitpredict Author: cbyn File: strategy.py License: MIT License | 6 votes |
def fit_and_trade(data, cols, split, threshold): ''' Fits and backtests a theoretical trading strategy ''' data = data[data.width > 0] X = data[cols] y = data.mid30 X_train = X.iloc[:split] X_test = X.iloc[split:] y_train = y.iloc[:split] y_test = y.iloc[split:] regressor = RandomForestRegressor(n_estimators=100, min_samples_leaf=500, random_state=42, n_jobs=-1) regressor.fit(X_train.values, y_train.values) trade(X_test.values, y_test.values, regressor, threshold)
Example 24
Project: interpret-text Author: interpretml File: common_utils.py License: MIT License | 5 votes |
def create_sklearn_random_forest_regressor(X, y): rfr = ensemble.RandomForestRegressor(max_depth=4, random_state=777) model = rfr.fit(X, y) return model
Example 25
Project: kaggle-code Author: CNuge File: hockey_front_to_back.py License: MIT License | 5 votes |
def fit(self, X, y): """load the data in, initiate the models""" self.X = X self.y = y self.opt_XGBoost_reg = xgb.XGBRegressor(**self.opt_xgb_params) self.opt_forest_reg = RandomForestRegressor(**self.opt_rf_params) self.opt_svm_reg = SVR(**self.opt_svm_params) """ fit the models """ self.opt_XGBoost_reg.fit(self.X ,self.y) self.opt_forest_reg.fit(self.X ,self.y) self.opt_svm_reg.fit(self.X ,self.y)
Example 26
Project: missingpy Author: epsilon-machine File: test_missforest.py License: GNU General Public License v3.0 | 5 votes |
def test_missforest_numerical_single(): # Test imputation with default parameter values # Test with a single missing value df = np.array([ [1, 0, 0, 1], [2, 1, 2, 2], [3, 2, 3, 2], [np.nan, 4, 5, 5], [6, 7, 6, 7], [8, 8, 8, 8], [16, 15, 18, 19], ]) statistics_mean = np.nanmean(df, axis=0) y = df[:, 0] X = df[:, 1:] good_rows = np.where(~np.isnan(y))[0] bad_rows = np.where(np.isnan(y))[0] rf = RandomForestRegressor(n_estimators=10, random_state=1337) rf.fit(X=X[good_rows], y=y[good_rows]) pred_val = rf.predict(X[bad_rows]) df_imputed = np.array([ [1, 0, 0, 1], [2, 1, 2, 2], [3, 2, 3, 2], [pred_val, 4, 5, 5], [6, 7, 6, 7], [8, 8, 8, 8], [16, 15, 18, 19], ]) imputer = MissForest(n_estimators=10, random_state=1337) assert_array_equal(imputer.fit_transform(df), df_imputed) assert_array_equal(imputer.statistics_.get('col_means'), statistics_mean)
Example 27
Project: deepchem Author: deepchem File: nci_rf.py License: MIT License | 5 votes |
def model_builder(model_dir): sklearn_model = RandomForestRegressor(n_estimators=500) return SklearnModel(sklearn_model, model_dir)
Example 28
Project: deepchem Author: deepchem File: KINASE_rf_model.py License: MIT License | 5 votes |
def task_model_builder(model_dir): sklearn_model = RandomForestRegressor( n_estimators=100, max_features=int(num_features/3), min_samples_split=5, n_jobs=-1) return dc.models.SklearnModel(sklearn_model, model_dir)
Example 29
Project: deepchem Author: deepchem File: UV_rf_model.py License: MIT License | 5 votes |
def task_model_builder(model_dir): sklearn_model = RandomForestRegressor( n_estimators=100, max_features=int(num_features / 3), min_samples_split=5, n_jobs=-1) return dc.models.SklearnModel(sklearn_model, model_dir)
Example 30
Project: deepchem Author: deepchem File: FACTORS_rf_model.py License: MIT License | 5 votes |
def task_model_builder(model_dir): sklearn_model = RandomForestRegressor( n_estimators=100, max_features=int(num_features/3), min_samples_split=5, n_jobs=-1) return dc.models.SklearnModel(sklearn_model, model_dir)