Python sklearn.tree.DecisionTreeRegressor() Examples
The following are 30 code examples for showing how to use sklearn.tree.DecisionTreeRegressor(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
You may check out the related API usage on the sidebar.
You may also want to check out all available functions/classes of the module
sklearn.tree
, or try the search function
.
Example 1
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 8 votes |
def test_regression(): # Check regression for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [0.5, 1.0], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyRegressor(), DecisionTreeRegressor(), KNeighborsRegressor(), SVR(gamma='scale')]: for params in grid: BaggingRegressor(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
Example 2
Project: LearningX Author: ankonzoid File: BaggedTrees.py License: MIT License | 6 votes |
def fit(self, X, y): """ Method: 1) Create n_estimator tree estimators with greedy splitting 2) For each estimator i, sample (X, y) randomly N times with replacement and train the estimator on this sampled dataset (X_i, y_i) 3) Predict by taking the mean predictions of each estimator """ self.models = [] N = len(X) for i in range(self.n_estimators): # Create tree with greedy splits model = DecisionTreeRegressor(max_depth=self.max_depth) # Bagging procedure idx_sample = np.random.choice(N, N) model.fit(X[idx_sample], y[idx_sample]) self.models.append(model)
Example 3
Project: LearningX Author: ankonzoid File: RandomForest.py License: MIT License | 6 votes |
def fit(self, X, y): """ Method: 1) Create n_estimator tree estimators with random splitting on d/3 max features 2) For each estimator i, sample (X, y) randomly N times with replacement and train the estimator on this sampled dataset (X_i, y_i) 3) Predict by taking the mean predictions of each estimator """ self.models = [] N, d = X.shape for i in range(self.n_estimators): # Create tree with random splitting on d/3 max features model = DecisionTreeRegressor(max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, splitter="random", max_features=d/3) # Bagging procedure idx_sample = np.random.choice(N, N) # random sampling of length N model.fit(X[idx_sample], y[idx_sample]) # fit on random sampling self.models.append(model)
Example 4
Project: Machine-Learning-for-Beginner-by-Python3 Author: Anfany File: AdaBoost_Regression.py License: MIT License | 6 votes |
def Train(data, modelcount, censhu, yanzhgdata): model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=censhu), n_estimators=modelcount, learning_rate=0.8) model.fit(data[:, :-1], data[:, -1]) # 给出训练数据的预测值 train_out = model.predict(data[:, :-1]) # 计算MSE train_mse = mse(data[:, -1], train_out) # 给出验证数据的预测值 add_yan = model.predict(yanzhgdata[:, :-1]) # 计算MSE add_mse = mse(yanzhgdata[:, -1], add_yan) print(train_mse, add_mse) return train_mse, add_mse # 最终确定组合的函数
Example 5
Project: practicalDataAnalysisCookbook Author: drabastomek File: regression_cart.py License: GNU General Public License v2.0 | 6 votes |
def regression_cart(x,y): ''' Estimate a CART regressor ''' # create the regressor object cart = sk.DecisionTreeRegressor(min_samples_split=80, max_features="auto", random_state=66666, max_depth=5) # estimate the model cart.fit(x,y) # return the object return cart # the file name of the dataset
Example 6
Project: m2cgen Author: BayesWitnesses File: test_meta.py License: MIT License | 6 votes |
def test_ransac_custom_base_estimator(): base_estimator = DecisionTreeRegressor() estimator = linear_model.RANSACRegressor( base_estimator=base_estimator, random_state=1) estimator.fit([[1], [2], [3]], [1, 2, 3]) assembler = assemblers.RANSACModelAssembler(estimator) actual = assembler.assemble() expected = ast.IfExpr( ast.CompExpr( ast.FeatureRef(0), ast.NumVal(2.5), ast.CompOpType.LTE), ast.NumVal(2.0), ast.NumVal(3.0)) assert utils.cmp_exprs(actual, expected)
Example 7
Project: m2cgen Author: BayesWitnesses File: test_tree.py License: MIT License | 6 votes |
def test_single_condition(): estimator = tree.DecisionTreeRegressor() estimator.fit([[1], [2]], [1, 2]) assembler = assemblers.TreeModelAssembler(estimator) actual = assembler.assemble() expected = ast.IfExpr( ast.CompExpr( ast.FeatureRef(0), ast.NumVal(1.5), ast.CompOpType.LTE), ast.NumVal(1.0), ast.NumVal(2.0)) assert utils.cmp_exprs(actual, expected)
Example 8
Project: m2cgen Author: BayesWitnesses File: test_tree.py License: MIT License | 6 votes |
def test_two_conditions(): estimator = tree.DecisionTreeRegressor() estimator.fit([[1], [2], [3]], [1, 2, 3]) assembler = assemblers.TreeModelAssembler(estimator) actual = assembler.assemble() expected = ast.IfExpr( ast.CompExpr( ast.FeatureRef(0), ast.NumVal(1.5), ast.CompOpType.LTE), ast.NumVal(1.0), ast.IfExpr( ast.CompExpr( ast.FeatureRef(0), ast.NumVal(2.5), ast.CompOpType.LTE), ast.NumVal(2.0), ast.NumVal(3.0))) assert utils.cmp_exprs(actual, expected)
Example 9
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 6 votes |
def test_bootstrap_features(): # Test that bootstrapping features may generate duplicate features. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=False, random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: assert_equal(boston.data.shape[1], np.unique(features).shape[0]) ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=True, random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: assert_greater(boston.data.shape[1], np.unique(features).shape[0])
Example 10
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 6 votes |
def test_parallel_regression(): # Check parallel regression. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) y1 = ensemble.predict(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict(X_test) assert_array_almost_equal(y1, y3)
Example 11
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_weight_boosting.py License: MIT License | 6 votes |
def test_gridsearch(): # Check that base trees can be grid-searched. # AdaBoost classification boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier()) parameters = {'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2), 'algorithm': ('SAMME', 'SAMME.R')} clf = GridSearchCV(boost, parameters) clf.fit(iris.data, iris.target) # AdaBoost regression boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(), random_state=0) parameters = {'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2)} clf = GridSearchCV(boost, parameters) clf.fit(boston.data, boston.target)
Example 12
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_tree.py License: MIT License | 6 votes |
def test_importances_gini_equal_mse(): # Check that gini is equivalent to mse for binary output variable X, y = datasets.make_classification(n_samples=2000, n_features=10, n_informative=3, n_redundant=0, n_repeated=0, shuffle=False, random_state=0) # The gini index and the mean square error (variance) might differ due # to numerical instability. Since those instabilities mainly occurs at # high tree depth, we restrict this maximal depth. clf = DecisionTreeClassifier(criterion="gini", max_depth=5, random_state=0).fit(X, y) reg = DecisionTreeRegressor(criterion="mse", max_depth=5, random_state=0).fit(X, y) assert_almost_equal(clf.feature_importances_, reg.feature_importances_) assert_array_equal(clf.tree_.feature, reg.tree_.feature) assert_array_equal(clf.tree_.children_left, reg.tree_.children_left) assert_array_equal(clf.tree_.children_right, reg.tree_.children_right) assert_array_equal(clf.tree_.n_node_samples, reg.tree_.n_node_samples)
Example 13
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_impute.py License: MIT License | 6 votes |
def test_imputation_pipeline_grid_search(): # Test imputation within a pipeline + gridsearch. X = sparse_random_matrix(100, 100, density=0.10) missing_values = X.data[0] pipeline = Pipeline([('imputer', SimpleImputer(missing_values=missing_values)), ('tree', tree.DecisionTreeRegressor(random_state=0))]) parameters = { 'imputer__strategy': ["mean", "median", "most_frequent"] } Y = sparse_random_matrix(100, 1, density=0.10).toarray() gs = GridSearchCV(pipeline, parameters) gs.fit(X, Y)
Example 14
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_multiclass.py License: MIT License | 6 votes |
def test_ovr_ovo_regressor(): # test that ovr and ovo work on regressors which don't have a decision_ # function ovr = OneVsRestClassifier(DecisionTreeRegressor()) pred = ovr.fit(iris.data, iris.target).predict(iris.data) assert_equal(len(ovr.estimators_), n_classes) assert_array_equal(np.unique(pred), [0, 1, 2]) # we are doing something sensible assert_greater(np.mean(pred == iris.target), .9) ovr = OneVsOneClassifier(DecisionTreeRegressor()) pred = ovr.fit(iris.data, iris.target).predict(iris.data) assert_equal(len(ovr.estimators_), n_classes * (n_classes - 1) / 2) assert_array_equal(np.unique(pred), [0, 1, 2]) # we are doing something sensible assert_greater(np.mean(pred == iris.target), .9)
Example 15
Project: drifter_ml Author: EricSchles File: test_regression_tests.py License: MIT License | 6 votes |
def generate_regression_data_and_models(): df = pd.DataFrame() for _ in range(1000): a = np.random.normal(0, 1) b = np.random.normal(0, 3) c = np.random.normal(12, 4) target = a + b + c df = df.append({ "A": a, "B": b, "C": c, "target": target }, ignore_index=True) reg1 = tree.DecisionTreeRegressor() reg2 = ensemble.RandomForestRegressor() column_names = ["A", "B", "C"] target_name = "target" X = df[column_names] reg1.fit(X, df[target_name]) reg2.fit(X, df[target_name]) return df, column_names, target_name, reg1, reg2
Example 16
Project: coremltools Author: apple File: test_io_types.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_tree_regressor(self): for dtype in self.number_data_type.keys(): scikit_model = DecisionTreeRegressor(random_state=1) data = self.scikit_data["data"].astype(dtype) target = self.scikit_data["target"].astype(dtype) scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target) test_data = data[0].reshape(1, -1) self._check_tree_model(spec, "multiArrayType", "doubleType", 1) coreml_model = create_model(spec) try: self.assertEqual( scikit_model.predict(test_data)[0].dtype, type(coreml_model.predict({"data": test_data})["target"]), ) self.assertEqual( scikit_model.predict(test_data)[0], coreml_model.predict({"data": test_data})["target"], msg="{} != {} for Dtype: {}".format( scikit_model.predict(test_data)[0], coreml_model.predict({"data": test_data})["target"], dtype, ), ) except RuntimeError: print("{} not supported. ".format(dtype))
Example 17
Project: Malware-GAN Author: yanminglai File: MalGAN__v3.py License: GNU General Public License v3.0 | 6 votes |
def build_blackbox_detector(self): if self.blackbox is 'RF': blackbox_detector = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=1) elif self.blackbox is 'SVM': blackbox_detector = svm.SVC() elif self.blackbox is 'LR': blackbox_detector = linear_model.LogisticRegression() elif self.blackbox is 'DT': blackbox_detector = tree.DecisionTreeRegressor() elif self.blackbox is 'MLP': blackbox_detector = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4, solver='sgd', verbose=0, tol=1e-4, random_state=1, learning_rate_init=.1) elif self.blackbox is 'VOTE': blackbox_detector = VOTEClassifier() return blackbox_detector
Example 18
Project: tpot Author: EpistasisLab File: export_tests.py License: GNU Lesser General Public License v3.0 | 6 votes |
def test_set_param_recursive_2(): """Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in SelectFromModel.""" pipeline_string = ( 'DecisionTreeRegressor(SelectFromModel(input_matrix, ' 'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, ' 'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,' 'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)' ) tpot_obj = TPOTRegressor() tpot_obj._fit_init() deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline) set_param_recursive(sklearn_pipeline.steps, 'random_state', 42) assert getattr(getattr(sklearn_pipeline.steps[0][1], 'estimator'), 'random_state') == 42 assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
Example 19
Project: pycobra Author: bhargavvader File: ewa.py License: MIT License | 6 votes |
def load_default(self, machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm']): """ Loads 4 different scikit-learn regressors by default. Parameters ---------- machine_list: optional, list of strings List of default machine names to be loaded. """ for machine in machine_list: try: if machine == 'lasso': self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'tree': self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'ridge': self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_) if machine == 'random_forest': self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'svm': self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_) except ValueError: continue
Example 20
Project: CausalDiscoveryToolbox Author: FenTechSolutions File: FSRegression.py License: MIT License | 6 votes |
def predict_features(self, df_features, df_target, idx=0, **kwargs): """For one variable, predict its neighbouring nodes. Args: df_features (pandas.DataFrame): df_target (pandas.Series): idx (int): (optional) for printing purposes kwargs (dict): additional options for algorithms Returns: list: scores of each feature relatively to the target """ X = df_features.values y = df_target.values regressor = DecisionTreeRegressor() regressor.fit(X, y) return regressor.feature_importances_
Example 21
Project: LearningX Author: ankonzoid File: DT_sklearn_regr.py License: MIT License | 5 votes |
def __init__(self, max_depth=20, min_samples_leaf=10): from sklearn.tree import DecisionTreeRegressor self.model = DecisionTreeRegressor(max_depth=max_depth, min_samples_leaf=min_samples_leaf, criterion="mse")
Example 22
Project: LearningX Author: ankonzoid File: GradientBoostedTree.py License: MIT License | 5 votes |
def fit(self, X, y): """ Method: 1) Train tree with greedy splitting on dataset (X, y) 2) Recursively (n_estimator times) compute the residual between the truth and prediction values (res = y-y_pred), and use the residual as the next estimator's training y (keep X the same) 3) The prediction at each estimator is the trained prediction plus the previous trained prediction, making the full prediction of the final model the sum of the predictions of each model. """ self.models = [] y_i = y y_pred_i = np.zeros(y.shape) for i in range(self.n_estimators): # Create tree with greedy splits model = DecisionTreeRegressor(max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf) model.fit(X, y_i) # Boosting procedure y_pred = model.predict(X) + y_pred_i # add previous prediction res = y - y_pred # compute residual y_i = res # set training label as residual y_pred_i = y_pred # update prediction value self.models.append(model)
Example 23
Project: Machine-Learning-for-Beginner-by-Python3 Author: Anfany File: Blending_Regression_pm25.py License: MIT License | 5 votes |
def Adaboost_First(self, data, max_depth=5, n_estimators=320): model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth), n_estimators=n_estimators, learning_rate=0.8) model.fit(data['train'][:, :-1], data['train'][:, -1]) # 注意存储验证数据集结果和预测数据集结果的不同 # 训练数据集的预测结果 xul = model.predict(data['train'][:, :-1]) # 验证的预测结果 yanre = model.predict(data['test'][:, :-1]) # 预测的预测结果 prer = model.predict(data['predict'][:, :-1]) # 储存 self.yanzhneg_pr.append(yanre) self.predi.append(prer) # 分别计算训练、验证、预测的误差 # 每计算一折后,要计算训练、验证、预测数据的误差 xx = self.RMSE(xul, data['train'][:, -1]) yy = self.RMSE(yanre, data['test'][:, -1]) pp = self.RMSE(prer, data['predict'][:, -1]) # 储存误差 self.error_dict['AdaBoost'] = [xx, yy, pp] # 验证数据集的真实输出结果 self.yanzhneg_real = data['test'][:, -1] # 预测数据集的真实输出结果 self.preal = data['predict'][:, -1] return print('1层中的AdaBoost运行完毕') # GBDT
Example 24
Project: Machine-Learning-for-Beginner-by-Python3 Author: Anfany File: AdaBoost_Regression.py License: MIT License | 5 votes |
def recspre(exstr, predata, datadict, zhe, count=100): tree, te = exstr.split('-') model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=int(te)), n_estimators=int(tree), learning_rate=0.8) model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1]) # 预测 yucede = model.predict(predata[:, :-1]) # 为了便于展示,选100条数据进行展示 zongleng = np.arange(len(yucede)) randomnum = np.random.choice(zongleng, count, replace=False) yucede_se = list(np.array(yucede)[randomnum]) yuce_re = list(np.array(predata[:, -1])[randomnum]) # 对比 plt.figure(figsize=(17, 9)) plt.subplot(2, 1, 1) plt.plot(list(range(len(yucede_se))), yucede_se, 'r--', label='预测', lw=2) plt.scatter(list(range(len(yuce_re))), yuce_re, c='b', marker='.', label='真实', lw=2) plt.xlim(-1, count + 1) plt.legend() plt.title('预测和真实值对比[最大树数%d]' % int(tree)) plt.subplot(2, 1, 2) plt.plot(list(range(len(yucede_se))), np.array(yuce_re) - np.array(yucede_se), 'k--', marker='s', label='真实-预测', lw=2) plt.legend() plt.title('预测和真实值相对误差') plt.savefig(r'C:\Users\GWT9\Desktop\duibi.jpg') return '预测真实对比完毕' # 主函数
Example 25
Project: defragTrees Author: sato9hara File: Baselines.py License: MIT License | 5 votes |
def fit(self, X, y, featurename=[]): self.dim_ = X.shape[1] self.setfeaturename(featurename) self.setdefaultpred(y) param_grid = {"max_depth": self.max_depth_, "min_samples_leaf": self.min_samples_leaf_} if self.modeltype_ == 'regression': mdl = tree.DecisionTreeRegressor() elif self.modeltype_ == 'classification': mdl = tree.DecisionTreeClassifier() grid_search = GridSearchCV(mdl, param_grid=param_grid, cv=self.cv_) grid_search.fit(X, y) mdl = grid_search.best_estimator_ self.__parseTree(mdl) self.weight_ = np.ones(len(self.rule_))
Example 26
Project: kaggle-HomeDepot Author: ChenglongChen File: skl_utils.py License: MIT License | 5 votes |
def __init__(self, base_estimator=None, n_estimators=50, max_features=1.0, max_depth=6, learning_rate=1.0, loss='linear', random_state=None): if base_estimator and base_estimator == 'etr': base_estimator = ExtraTreeRegressor(max_depth=max_depth, max_features=max_features) else: base_estimator = DecisionTreeRegressor(max_depth=max_depth, max_features=max_features) self.model = sklearn.ensemble.AdaBoostRegressor( base_estimator=base_estimator, n_estimators=n_estimators, learning_rate=learning_rate, random_state=random_state, loss=loss)
Example 27
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_bootstrap_samples(): # Test that bootstrapping samples generate non-perfect base estimators. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) base_estimator = DecisionTreeRegressor().fit(X_train, y_train) # without bootstrap, all trees are perfect on the training set ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=False, random_state=rng).fit(X_train, y_train) assert_equal(base_estimator.score(X_train, y_train), ensemble.score(X_train, y_train)) # with bootstrap, trees are no longer perfect on the training set ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=True, random_state=rng).fit(X_train, y_train) assert_greater(base_estimator.score(X_train, y_train), ensemble.score(X_train, y_train)) # check that each sampling correspond to a complete bootstrap resample. # the size of each bootstrap should be the same as the input data but # the data should be different (checked using the hash of the data). ensemble = BaggingRegressor(base_estimator=DummySizeEstimator(), bootstrap=True).fit(X_train, y_train) training_hash = [] for estimator in ensemble.estimators_: assert estimator.training_size_ == X_train.shape[0] training_hash.append(estimator.training_hash_) assert len(set(training_hash)) == len(training_hash)
Example 28
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_oob_score_regression(): # Check that oob prediction is a good estimation of the generalization # error. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=50, bootstrap=True, oob_score=True, random_state=rng).fit(X_train, y_train) test_score = clf.score(X_test, y_test) assert_less(abs(test_score - clf.oob_score_), 0.1) # Test with few estimators assert_warns(UserWarning, BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=1, bootstrap=True, oob_score=True, random_state=rng).fit, X_train, y_train)
Example 29
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_bagging.py License: MIT License | 5 votes |
def test_bagging_regressor_with_missing_inputs(): # Check that BaggingRegressor can accept X with missing/infinite data X = np.array([ [1, 3, 5], [2, None, 6], [2, np.nan, 6], [2, np.inf, 6], [2, np.NINF, 6], ]) y_values = [ np.array([2, 3, 3, 3, 3]), np.array([ [2, 1, 9], [3, 6, 8], [3, 6, 8], [3, 6, 8], [3, 6, 8], ]) ] for y in y_values: regressor = DecisionTreeRegressor() pipeline = make_pipeline( FunctionTransformer(replace, validate=False), regressor ) pipeline.fit(X, y).predict(X) bagging_regressor = BaggingRegressor(pipeline) y_hat = bagging_regressor.fit(X, y).predict(X) assert_equal(y.shape, y_hat.shape) # Verify that exceptions can be raised by wrapper regressor regressor = DecisionTreeRegressor() pipeline = make_pipeline(regressor) assert_raises(ValueError, pipeline.fit, X, y) bagging_regressor = BaggingRegressor(pipeline) assert_raises(ValueError, bagging_regressor.fit, X, y)
Example 30
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_export.py License: MIT License | 5 votes |
def test_friedman_mse_in_graphviz(): clf = DecisionTreeRegressor(criterion="friedman_mse", random_state=0) clf.fit(X, y) dot_data = StringIO() export_graphviz(clf, out_file=dot_data) clf = GradientBoostingClassifier(n_estimators=2, random_state=0) clf.fit(X, y) for estimator in clf.estimators_: export_graphviz(estimator[0], out_file=dot_data) for finding in finditer(r"\[.*?samples.*?\]", dot_data.getvalue()): assert_in("friedman_mse", finding.group())