Python sklearn.tree.DecisionTreeClassifier() Examples
The following are 30
code examples of sklearn.tree.DecisionTreeClassifier().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.tree
, or try the search function
.

Example #1
Source File: sasma.py From unmixing with MIT License | 6 votes |
def predict(self, fit=None, features=None, probabilities=False): ''' Predict the class labels (e.g., endmember types) based on an existing tree fit and new predictive features. Arguments: fit The result of tree.DecisionTreeClassifier.fit(); uses the last fit model if None. features The new X array/ new predictive features to use; should be (p x n), n samples with p features. ''' if fit is None: fit = self.last_fit if features is None: features = self.x_features_array if probabilities: shp = self.y_raster.shape return fit.predict(features.T).T.reshape((self.n_labels, shp[1], shp[2])) return fit.predict(features.T).reshape(self.y_raster.shape)
Example #2
Source File: AdaBoost_Classify.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 6 votes |
def Train(data, modelcount, censhu, yanzhgdata): model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=censhu), algorithm="SAMME", n_estimators=modelcount, learning_rate=0.8) model.fit(data[:, :-1], data[:, -1]) # 给出训练数据的预测值 train_out = model.predict(data[:, :-1]) # 计算MSE train_mse = fmse(data[:, -1], train_out)[0] # 给出验证数据的预测值 add_yan = model.predict(yanzhgdata[:, :-1]) # 计算f1度量 add_mse = fmse(yanzhgdata[:, -1], add_yan)[0] print(train_mse, add_mse) return train_mse, add_mse # 最终确定组合的函数
Example #3
Source File: AdaBoost_Classify.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 6 votes |
def recspre(estrs, predata, datadict, zhe): mo, ze = estrs.split('-') model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=int(ze)), algorithm="SAMME", n_estimators=int(mo), learning_rate=0.8) model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1]) # 预测 yucede = model.predict(predata[:, :-1]) # 计算混淆矩阵 print(ConfuseMatrix(predata[:, -1], yucede)) return fmse(predata[:, -1], yucede) # 主函数
Example #4
Source File: learn_nse.py From scikit-multiflow with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, base_estimator=DecisionTreeClassifier(), window_size=250, slope=0.5, crossing_point=10, n_estimators=15, pruning=None): super().__init__() self.ensemble = [] self.ensemble_weights = [] self.bkts = [] self.wkts = [] self.buffer = [] self.window_size = window_size self.slope = slope self.crossing_point = crossing_point self.n_estimators = n_estimators self.pruning = pruning self.X_batch = [] self.y_batch = [] self.instance_weights = [] self.base_estimator = cp.deepcopy(base_estimator) self.classes = None
Example #5
Source File: learn_pp.py From scikit-multiflow with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, base_estimator=DecisionTreeClassifier(), error_threshold=0.5, n_estimators=30, n_ensembles=10, window_size=100, random_state=None): super().__init__() self.base_estimator = base_estimator self.n_estimators = n_estimators self.ensembles = [] self.ensemble_weights = [] self.classes = None self.n_ensembles = n_ensembles self.random = check_random_state(random_state) self.random_state = random_state self.error_threshold = error_threshold self.X_batch = [] self.y_batch = [] self.window_size = window_size
Example #6
Source File: utils.py From m2cgen with MIT License | 6 votes |
def __call__(self, estimator): fitted_estimator = estimator.fit(self.X_train, self.y_train) if isinstance(estimator, (LinearClassifierMixin, SVC, NuSVC, LightBaseClassifier)): y_pred = estimator.decision_function(self.X_test) elif isinstance(estimator, DecisionTreeClassifier): y_pred = estimator.predict_proba(self.X_test.astype(np.float32)) elif isinstance( estimator, (ForestClassifier, XGBClassifier, LGBMClassifier)): y_pred = estimator.predict_proba(self.X_test) else: y_pred = estimator.predict(self.X_test) return self.X_test, y_pred, fitted_estimator
Example #7
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_classification(): # Check classification for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [1, 2, 4], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyClassifier(), Perceptron(tol=1e-3), DecisionTreeClassifier(), KNeighborsClassifier(), SVC(gamma="scale")]: for params in grid: BaggingClassifier(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
Example #8
Source File: test_weight_boosting.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_gridsearch(): # Check that base trees can be grid-searched. # AdaBoost classification boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier()) parameters = {'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2), 'algorithm': ('SAMME', 'SAMME.R')} clf = GridSearchCV(boost, parameters) clf.fit(iris.data, iris.target) # AdaBoost regression boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(), random_state=0) parameters = {'n_estimators': (1, 2), 'base_estimator__max_depth': (1, 2)} clf = GridSearchCV(boost, parameters) clf.fit(boston.data, boston.target)
Example #9
Source File: test_export.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_plot_tree(pyplot): # mostly smoke tests # Check correctness of export_graphviz clf = DecisionTreeClassifier(max_depth=3, min_samples_split=2, criterion="gini", random_state=2) clf.fit(X, y) # Test export code feature_names = ['first feat', 'sepal_width'] nodes = plot_tree(clf, feature_names=feature_names) assert len(nodes) == 3 assert nodes[0].get_text() == ("first feat <= 0.0\nentropy = 0.5\n" "samples = 6\nvalue = [3, 3]") assert nodes[1].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [3, 0]" assert nodes[2].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [0, 3]"
Example #10
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_probability(): # Predict probabilities using DecisionTreeClassifier. for name, Tree in CLF_TREES.items(): clf = Tree(max_depth=1, max_features=1, random_state=42) clf.fit(iris.data, iris.target) prob_predict = clf.predict_proba(iris.data) assert_array_almost_equal(np.sum(prob_predict, 1), np.ones(iris.data.shape[0]), err_msg="Failed with {0}".format(name)) assert_array_equal(np.argmax(prob_predict, 1), clf.predict(iris.data), err_msg="Failed with {0}".format(name)) assert_almost_equal(clf.predict_proba(iris.data), np.exp(clf.predict_log_proba(iris.data)), 8, err_msg="Failed with {0}".format(name))
Example #11
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_importances_gini_equal_mse(): # Check that gini is equivalent to mse for binary output variable X, y = datasets.make_classification(n_samples=2000, n_features=10, n_informative=3, n_redundant=0, n_repeated=0, shuffle=False, random_state=0) # The gini index and the mean square error (variance) might differ due # to numerical instability. Since those instabilities mainly occurs at # high tree depth, we restrict this maximal depth. clf = DecisionTreeClassifier(criterion="gini", max_depth=5, random_state=0).fit(X, y) reg = DecisionTreeRegressor(criterion="mse", max_depth=5, random_state=0).fit(X, y) assert_almost_equal(clf.feature_importances_, reg.feature_importances_) assert_array_equal(clf.tree_.feature, reg.tree_.feature) assert_array_equal(clf.tree_.children_left, reg.tree_.children_left) assert_array_equal(clf.tree_.children_right, reg.tree_.children_right) assert_array_equal(clf.tree_.n_node_samples, reg.tree_.n_node_samples)
Example #12
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_sample_weight_invalid(): # Check sample weighting raises errors. X = np.arange(100)[:, np.newaxis] y = np.ones(100) y[:50] = 0.0 clf = DecisionTreeClassifier(random_state=0) sample_weight = np.random.rand(100, 1) assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight) sample_weight = np.array(0) assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight) sample_weight = np.ones(101) assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight) sample_weight = np.ones(99) assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight)
Example #13
Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_huge_allocations(): n_bits = 8 * struct.calcsize("P") X = np.random.randn(10, 2) y = np.random.randint(0, 2, 10) # Sanity check: we cannot request more memory than the size of the address # space. Currently raises OverflowError. huge = 2 ** (n_bits + 1) clf = DecisionTreeClassifier(splitter='best', max_leaf_nodes=huge) assert_raises(Exception, clf.fit, X, y) # Non-regression test: MemoryError used to be dropped by Cython # because of missing "except *". huge = 2 ** (n_bits - 1) - 1 clf = DecisionTreeClassifier(splitter='best', max_leaf_nodes=huge) assert_raises(MemoryError, clf.fit, X, y)
Example #14
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_set_params_passes_all_parameters(): # Make sure all parameters are passed together to set_params # of nested estimator. Regression test for #9944 class TestDecisionTree(DecisionTreeClassifier): def set_params(self, **kwargs): super().set_params(**kwargs) # expected_kwargs is in test scope assert kwargs == expected_kwargs return self expected_kwargs = {'max_depth': 5, 'min_samples_leaf': 2} for est in [Pipeline([('estimator', TestDecisionTree())]), GridSearchCV(TestDecisionTree(), {})]: est.set_params(estimator__max_depth=5, estimator__min_samples_leaf=2)
Example #15
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_score_sample_weight(): rng = np.random.RandomState(0) # test both ClassifierMixin and RegressorMixin estimators = [DecisionTreeClassifier(max_depth=2), DecisionTreeRegressor(max_depth=2)] sets = [datasets.load_iris(), datasets.load_boston()] for est, ds in zip(estimators, sets): est.fit(ds.data, ds.target) # generate random sample weights sample_weight = rng.randint(1, 10, size=len(ds.target)) # check that the score with and without sample weights are different assert_not_equal(est.score(ds.data, ds.target), est.score(ds.data, ds.target, sample_weight=sample_weight), msg="Unweighted and weighted scores " "are unexpectedly equal")
Example #16
Source File: test_pdpbox.py From docker-python with Apache License 2.0 | 6 votes |
def test_simple_pdp(self): # set up data data = pd.read_csv("/input/tests/data/fifa_2018_stats.csv") y = (data['Man of the Match'] == "Yes") feature_names = [i for i in data.columns if data[i].dtype in [np.int64]] X = data[feature_names] train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1) # Build simple model tree_model = DecisionTreeClassifier(random_state=0, max_depth=3).fit(train_X, train_y) # Set up pdp as table pdp_goals = pdp.pdp_isolate(model=tree_model, dataset=val_X, model_features=feature_names, feature='Goal Scored') # make plot pdp.pdp_plot(pdp_goals, 'Goal Scored')
Example #17
Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0 | 6 votes |
def test_17_decisiontreeclassifier(self): print("\ntest 17 (decision tree classifier with preprocessing) [multi-class]\n") X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification() model = DecisionTreeClassifier() pipeline_obj = Pipeline([ ("scaler", Binarizer()), ("model", model) ]) pipeline_obj.fit(X,y) file_name = 'test17sklearn.pmml' skl_to_pmml(pipeline_obj, features, target, file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file) model_pred = pipeline_obj.predict(X_test) model_prob = pipeline_obj.predict_proba(X_test) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
Example #18
Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0 | 6 votes |
def test_18_decisiontreeclassifier(self): print("\ntest 18 (decision tree classifier with preprocessing) [binary-class]\n") X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification() model = DecisionTreeClassifier() pipeline_obj = Pipeline([ ("scaler", Binarizer()), ("model", model) ]) pipeline_obj.fit(X,y) file_name = 'test18sklearn.pmml' skl_to_pmml(pipeline_obj, features, target, file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file) model_pred = pipeline_obj.predict(X_test) model_prob = pipeline_obj.predict_proba(X_test) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
Example #19
Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0 | 6 votes |
def test_19_decisiontreeclassifier(self): print("\ntest 19 (decision tree classifier without preprocessing) [multi-class]\n") X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification() model = DecisionTreeClassifier() pipeline_obj = Pipeline([ ("model", model) ]) pipeline_obj.fit(X,y) file_name = 'test19sklearn.pmml' skl_to_pmml(pipeline_obj, features, target, file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file) model_pred = pipeline_obj.predict(X_test) model_prob = pipeline_obj.predict_proba(X_test) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
Example #20
Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0 | 6 votes |
def test_20_decisiontreeclassifier(self): print("\ntest 20 (decision tree classifier without preprocessing) [binary-class]\n") X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification() model = DecisionTreeClassifier() pipeline_obj = Pipeline([ ("model", model) ]) pipeline_obj.fit(X,y) file_name = 'test20sklearn.pmml' skl_to_pmml(pipeline_obj, features, target, file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file) model_pred = pipeline_obj.predict(X_test) model_prob = pipeline_obj.predict_proba(X_test) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
Example #21
Source File: TrAdaBoost.py From transferlearning with MIT License | 5 votes |
def train_classify(self, trans_data, trans_label, test_data, P): clf = tree.DecisionTreeClassifier(criterion="gini", max_features="log2", splitter="random") clf.fit(trans_data, trans_label, sample_weight=P[:, 0]) return clf.predict(test_data)
Example #22
Source File: utils.py From contextualbandits with BSD 2-Clause "Simplified" License | 5 votes |
def __init__(self, beta_prior=(1,1), ts=False, alpha=0.8, random_state=None, *args, **kwargs): self.beta_prior = beta_prior self.random_state = random_state self.conf_coef = alpha self.ts = bool(ts) self.model = DecisionTreeClassifier(*args, **kwargs) self.is_fitted = False self.aux_beta = (beta_prior[0], beta_prior[1])
Example #23
Source File: model_loop.py From fake-news-detection with MIT License | 5 votes |
def define_clfs_params(self): ''' Defines all relevant parameters and classes for classfier objects. Edit these if you wish to change parameters. ''' # These are the classifiers self.clfs = { 'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1), 'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'), 'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200), 'LR': LogisticRegression(penalty = 'l1', C = 1e5), 'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0), 'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10), 'NB': GaussianNB(), 'DT': DecisionTreeClassifier(), 'SGD': SGDClassifier(loss = 'log', penalty = 'l2'), 'KNN': KNeighborsClassifier(n_neighbors = 3) } # These are the parameters which will be run through self.params = { 'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]}, 'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]}, 'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]}, 'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]}, 'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]}, 'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]}, 'NB': {}, 'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]}, 'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]}, 'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']} }
Example #24
Source File: DT_sklearn_clf.py From LearningX with MIT License | 5 votes |
def __init__(self, max_depth=20, min_samples_leaf=10): from sklearn.tree import DecisionTreeClassifier self.model = DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=min_samples_leaf, criterion="gini", splitter="best")
Example #25
Source File: pipeline.py From EvalNE with MIT License | 5 votes |
def lp_model(self): model = self._config.get('GENERAL', 'lp_model') if model == 'LogisticRegression': return LogisticRegression(solver='liblinear') elif model == 'LogisticRegressionCV': return LogisticRegressionCV(Cs=10, cv=5, penalty='l2', scoring='roc_auc', solver='lbfgs', max_iter=100) elif model == 'DecisionTreeClassifier': return DecisionTreeClassifier() elif model == 'SVM': parameters = {'C': [0.1, 1, 10, 100, 1000]} return GridSearchCV(LinearSVC(), parameters, cv=5) else: return util.auto_import(model)
Example #26
Source File: common.py From typhon with MIT License | 5 votes |
def _ice_cloud_model(): """Return the default model for the ice cloud classifier""" # As simple as it is. We do not need a grid search trainer for the DTC # since it has already a good performance. return DecisionTreeClassifier( max_depth=12, random_state=5, # n_estimators=20, max_features=9, )
Example #27
Source File: 23_DecisionTree.py From Python with MIT License | 5 votes |
def train_using_gini(X_train, X_test, y_train,data): # Creating the classifier object clf_gini = DecisionTreeClassifier(criterion = "gini", random_state = 100,max_depth=3, min_samples_leaf=5) # Performing training clf_gini.fit(X_train, y_train) visualize_tree(data,clf_gini,'gini') print('\nFeature Importance : ',clf_gini.feature_importances_) return clf_gini # Function to perform training with entropy.
Example #28
Source File: 23_DecisionTree.py From Python with MIT License | 5 votes |
def tarin_using_entropy(X_train, X_test, y_train,data): # Decision tree with entropy clf_entropy = DecisionTreeClassifier( criterion = "entropy", random_state = 100, max_depth = 3, min_samples_leaf = 5) # Performing training clf_entropy.fit(X_train, y_train) visualize_tree(data,clf_entropy,'entropy') print('\nFeature Importance : ',clf_entropy.feature_importances_) return clf_entropy # Function to make predictions
Example #29
Source File: dtree.py From Python with MIT License | 5 votes |
def train_using_gini(X_train, X_test, y_train,data): # Creating the classifier object clf_gini = DecisionTreeClassifier(criterion = "gini", random_state = 100,max_depth=3, min_samples_leaf=5) # Performing training clf_gini.fit(X_train, y_train) visualize_tree(data,clf_gini,'gini') print('\nFeature Importance : ',clf_gini.feature_importances_) return clf_gini # Function to perform training with entropy.
Example #30
Source File: dtree.py From Python with MIT License | 5 votes |
def tarin_using_entropy(X_train, X_test, y_train,data): # Decision tree with entropy clf_entropy = DecisionTreeClassifier( criterion = "entropy", random_state = 100, max_depth = 3, min_samples_leaf = 5) # Performing training clf_entropy.fit(X_train, y_train) visualize_tree(data,clf_entropy,'entropy') print('\nFeature Importance : ',clf_entropy.feature_importances_) return clf_entropy # Function to make predictions