Python sklearn.tree.DecisionTreeClassifier() Examples

The following are 30 code examples for showing how to use sklearn.tree.DecisionTreeClassifier(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.tree , or try the search function .

Example 1
Project: unmixing   Author: arthur-e   File: sasma.py    License: MIT License 6 votes vote down vote up
def predict(self, fit=None, features=None, probabilities=False):
        '''
        Predict the class labels (e.g., endmember types) based on an existing
        tree fit and new predictive features. Arguments:
            fit         The result of tree.DecisionTreeClassifier.fit(); uses
                        the last fit model if None.
            features    The new X array/ new predictive features to use;
                        should be (p x n), n samples with p features.
        '''
        if fit is None: fit = self.last_fit
        if features is None: features = self.x_features_array
        if probabilities:
            shp = self.y_raster.shape
            return fit.predict(features.T).T.reshape((self.n_labels, shp[1], shp[2]))

        return fit.predict(features.T).reshape(self.y_raster.shape) 
Example 2
Project: Machine-Learning-for-Beginner-by-Python3   Author: Anfany   File: AdaBoost_Classify.py    License: MIT License 6 votes vote down vote up
def Train(data, modelcount, censhu, yanzhgdata):
    model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=censhu),
                               algorithm="SAMME",
                               n_estimators=modelcount, learning_rate=0.8)

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = fmse(data[:, -1], train_out)[0]

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算f1度量
    add_mse = fmse(yanzhgdata[:, -1], add_yan)[0]
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
Example 3
Project: Machine-Learning-for-Beginner-by-Python3   Author: Anfany   File: AdaBoost_Classify.py    License: MIT License 6 votes vote down vote up
def recspre(estrs, predata, datadict, zhe):

    mo, ze = estrs.split('-')
    model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=int(ze)),
                               algorithm="SAMME",
                               n_estimators=int(mo), learning_rate=0.8)

    model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1])

    # 预测
    yucede = model.predict(predata[:, :-1])
    # 计算混淆矩阵

    print(ConfuseMatrix(predata[:, -1], yucede))

    return fmse(predata[:, -1], yucede)

# 主函数 
Example 4
Project: scikit-multiflow   Author: scikit-multiflow   File: learn_nse.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self,
                 base_estimator=DecisionTreeClassifier(),
                 window_size=250,
                 slope=0.5,
                 crossing_point=10,
                 n_estimators=15,
                 pruning=None):
        super().__init__()
        self.ensemble = []
        self.ensemble_weights = []
        self.bkts = []
        self.wkts = []
        self.buffer = []
        self.window_size = window_size
        self.slope = slope
        self.crossing_point = crossing_point
        self.n_estimators = n_estimators
        self.pruning = pruning
        self.X_batch = []
        self.y_batch = []
        self.instance_weights = []
        self.base_estimator = cp.deepcopy(base_estimator)
        self.classes = None 
Example 5
Project: scikit-multiflow   Author: scikit-multiflow   File: learn_pp.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self, base_estimator=DecisionTreeClassifier(),
                 error_threshold=0.5,
                 n_estimators=30,
                 n_ensembles=10,
                 window_size=100,
                 random_state=None):
        super().__init__()
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.ensembles = []
        self.ensemble_weights = []
        self.classes = None
        self.n_ensembles = n_ensembles
        self.random = check_random_state(random_state)
        self.random_state = random_state
        self.error_threshold = error_threshold
        self.X_batch = []
        self.y_batch = []
        self.window_size = window_size 
Example 6
Project: m2cgen   Author: BayesWitnesses   File: utils.py    License: MIT License 6 votes vote down vote up
def __call__(self, estimator):
        fitted_estimator = estimator.fit(self.X_train, self.y_train)

        if isinstance(estimator, (LinearClassifierMixin, SVC, NuSVC,
                                  LightBaseClassifier)):
            y_pred = estimator.decision_function(self.X_test)
        elif isinstance(estimator, DecisionTreeClassifier):
            y_pred = estimator.predict_proba(self.X_test.astype(np.float32))
        elif isinstance(
                estimator,
                (ForestClassifier, XGBClassifier, LGBMClassifier)):
            y_pred = estimator.predict_proba(self.X_test)
        else:
            y_pred = estimator.predict(self.X_test)

        return self.X_test, y_pred, fitted_estimator 
Example 7
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bagging.py    License: MIT License 6 votes vote down vote up
def test_classification():
    # Check classification for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [1, 2, 4],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyClassifier(),
                           Perceptron(tol=1e-3),
                           DecisionTreeClassifier(),
                           KNeighborsClassifier(),
                           SVC(gamma="scale")]:
        for params in grid:
            BaggingClassifier(base_estimator=base_estimator,
                              random_state=rng,
                              **params).fit(X_train, y_train).predict(X_test) 
Example 8
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_weight_boosting.py    License: MIT License 6 votes vote down vote up
def test_gridsearch():
    # Check that base trees can be grid-searched.
    # AdaBoost classification
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2),
                  'algorithm': ('SAMME', 'SAMME.R')}
    clf = GridSearchCV(boost, parameters)
    clf.fit(iris.data, iris.target)

    # AdaBoost regression
    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
                              random_state=0)
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2)}
    clf = GridSearchCV(boost, parameters)
    clf.fit(boston.data, boston.target) 
Example 9
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_export.py    License: MIT License 6 votes vote down vote up
def test_plot_tree(pyplot):
    # mostly smoke tests
    # Check correctness of export_graphviz
    clf = DecisionTreeClassifier(max_depth=3,
                                 min_samples_split=2,
                                 criterion="gini",
                                 random_state=2)
    clf.fit(X, y)

    # Test export code
    feature_names = ['first feat', 'sepal_width']
    nodes = plot_tree(clf, feature_names=feature_names)
    assert len(nodes) == 3
    assert nodes[0].get_text() == ("first feat <= 0.0\nentropy = 0.5\n"
                                   "samples = 6\nvalue = [3, 3]")
    assert nodes[1].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [3, 0]"
    assert nodes[2].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [0, 3]" 
Example 10
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_tree.py    License: MIT License 6 votes vote down vote up
def test_probability():
    # Predict probabilities using DecisionTreeClassifier.

    for name, Tree in CLF_TREES.items():
        clf = Tree(max_depth=1, max_features=1, random_state=42)
        clf.fit(iris.data, iris.target)

        prob_predict = clf.predict_proba(iris.data)
        assert_array_almost_equal(np.sum(prob_predict, 1),
                                  np.ones(iris.data.shape[0]),
                                  err_msg="Failed with {0}".format(name))
        assert_array_equal(np.argmax(prob_predict, 1),
                           clf.predict(iris.data),
                           err_msg="Failed with {0}".format(name))
        assert_almost_equal(clf.predict_proba(iris.data),
                            np.exp(clf.predict_log_proba(iris.data)), 8,
                            err_msg="Failed with {0}".format(name)) 
Example 11
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_tree.py    License: MIT License 6 votes vote down vote up
def test_importances_gini_equal_mse():
    # Check that gini is equivalent to mse for binary output variable

    X, y = datasets.make_classification(n_samples=2000,
                                        n_features=10,
                                        n_informative=3,
                                        n_redundant=0,
                                        n_repeated=0,
                                        shuffle=False,
                                        random_state=0)

    # The gini index and the mean square error (variance) might differ due
    # to numerical instability. Since those instabilities mainly occurs at
    # high tree depth, we restrict this maximal depth.
    clf = DecisionTreeClassifier(criterion="gini", max_depth=5,
                                 random_state=0).fit(X, y)
    reg = DecisionTreeRegressor(criterion="mse", max_depth=5,
                                random_state=0).fit(X, y)

    assert_almost_equal(clf.feature_importances_, reg.feature_importances_)
    assert_array_equal(clf.tree_.feature, reg.tree_.feature)
    assert_array_equal(clf.tree_.children_left, reg.tree_.children_left)
    assert_array_equal(clf.tree_.children_right, reg.tree_.children_right)
    assert_array_equal(clf.tree_.n_node_samples, reg.tree_.n_node_samples) 
Example 12
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_tree.py    License: MIT License 6 votes vote down vote up
def test_sample_weight_invalid():
    # Check sample weighting raises errors.
    X = np.arange(100)[:, np.newaxis]
    y = np.ones(100)
    y[:50] = 0.0

    clf = DecisionTreeClassifier(random_state=0)

    sample_weight = np.random.rand(100, 1)
    assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight)

    sample_weight = np.array(0)
    assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight)

    sample_weight = np.ones(101)
    assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight)

    sample_weight = np.ones(99)
    assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight) 
Example 13
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_tree.py    License: MIT License 6 votes vote down vote up
def test_huge_allocations():
    n_bits = 8 * struct.calcsize("P")

    X = np.random.randn(10, 2)
    y = np.random.randint(0, 2, 10)

    # Sanity check: we cannot request more memory than the size of the address
    # space. Currently raises OverflowError.
    huge = 2 ** (n_bits + 1)
    clf = DecisionTreeClassifier(splitter='best', max_leaf_nodes=huge)
    assert_raises(Exception, clf.fit, X, y)

    # Non-regression test: MemoryError used to be dropped by Cython
    # because of missing "except *".
    huge = 2 ** (n_bits - 1) - 1
    clf = DecisionTreeClassifier(splitter='best', max_leaf_nodes=huge)
    assert_raises(MemoryError, clf.fit, X, y) 
Example 14
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_base.py    License: MIT License 6 votes vote down vote up
def test_set_params_passes_all_parameters():
    # Make sure all parameters are passed together to set_params
    # of nested estimator. Regression test for #9944

    class TestDecisionTree(DecisionTreeClassifier):
        def set_params(self, **kwargs):
            super().set_params(**kwargs)
            # expected_kwargs is in test scope
            assert kwargs == expected_kwargs
            return self

    expected_kwargs = {'max_depth': 5, 'min_samples_leaf': 2}
    for est in [Pipeline([('estimator', TestDecisionTree())]),
                GridSearchCV(TestDecisionTree(), {})]:
        est.set_params(estimator__max_depth=5,
                       estimator__min_samples_leaf=2) 
Example 15
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_base.py    License: MIT License 6 votes vote down vote up
def test_score_sample_weight():

    rng = np.random.RandomState(0)

    # test both ClassifierMixin and RegressorMixin
    estimators = [DecisionTreeClassifier(max_depth=2),
                  DecisionTreeRegressor(max_depth=2)]
    sets = [datasets.load_iris(),
            datasets.load_boston()]

    for est, ds in zip(estimators, sets):
        est.fit(ds.data, ds.target)
        # generate random sample weights
        sample_weight = rng.randint(1, 10, size=len(ds.target))
        # check that the score with and without sample weights are different
        assert_not_equal(est.score(ds.data, ds.target),
                         est.score(ds.data, ds.target,
                                   sample_weight=sample_weight),
                         msg="Unweighted and weighted scores "
                             "are unexpectedly equal") 
Example 16
Project: docker-python   Author: Kaggle   File: test_pdpbox.py    License: Apache License 2.0 6 votes vote down vote up
def test_simple_pdp(self):
        # set up data
        data = pd.read_csv("/input/tests/data/fifa_2018_stats.csv")
        y = (data['Man of the Match'] == "Yes")
        feature_names = [i for i in data.columns if data[i].dtype in [np.int64]]
        X = data[feature_names]
        train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)
        # Build simple model
        tree_model = DecisionTreeClassifier(random_state=0,
                                            max_depth=3).fit(train_X, train_y)

        # Set up pdp as table
        pdp_goals = pdp.pdp_isolate(model=tree_model,
                                    dataset=val_X,
                                    model_features=feature_names,
                                    feature='Goal Scored')
        # make plot
        pdp.pdp_plot(pdp_goals, 'Goal Scored') 
Example 17
Project: nyoka   Author: nyoka-pmml   File: testScoreWithAdapaSklearn.py    License: Apache License 2.0 6 votes vote down vote up
def test_17_decisiontreeclassifier(self):
        print("\ntest 17 (decision tree classifier with preprocessing) [multi-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification()

        model = DecisionTreeClassifier()
        pipeline_obj = Pipeline([
            ("scaler", Binarizer()),
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test17sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example 18
Project: nyoka   Author: nyoka-pmml   File: testScoreWithAdapaSklearn.py    License: Apache License 2.0 6 votes vote down vote up
def test_18_decisiontreeclassifier(self):
        print("\ntest 18 (decision tree classifier with preprocessing) [binary-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification()

        model = DecisionTreeClassifier()
        pipeline_obj = Pipeline([
            ("scaler", Binarizer()),
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test18sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example 19
Project: nyoka   Author: nyoka-pmml   File: testScoreWithAdapaSklearn.py    License: Apache License 2.0 6 votes vote down vote up
def test_19_decisiontreeclassifier(self):
        print("\ntest 19 (decision tree classifier without preprocessing) [multi-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification()

        model = DecisionTreeClassifier()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test19sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example 20
Project: nyoka   Author: nyoka-pmml   File: testScoreWithAdapaSklearn.py    License: Apache License 2.0 6 votes vote down vote up
def test_20_decisiontreeclassifier(self):
        print("\ntest 20 (decision tree classifier without preprocessing) [binary-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification()

        model = DecisionTreeClassifier()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test20sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example 21
Project: transferlearning   Author: jindongwang   File: TrAdaBoost.py    License: MIT License 5 votes vote down vote up
def train_classify(self, trans_data, trans_label, test_data, P):
        clf = tree.DecisionTreeClassifier(criterion="gini", max_features="log2", splitter="random")
        clf.fit(trans_data, trans_label, sample_weight=P[:, 0])
        return clf.predict(test_data) 
Example 22
Project: contextualbandits   Author: david-cortes   File: utils.py    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
def __init__(self, beta_prior=(1,1), ts=False, alpha=0.8, random_state=None,
                 *args, **kwargs):
        self.beta_prior = beta_prior
        self.random_state = random_state
        self.conf_coef = alpha
        self.ts = bool(ts)
        self.model = DecisionTreeClassifier(*args, **kwargs)
        self.is_fitted = False
        self.aux_beta = (beta_prior[0], beta_prior[1]) 
Example 23
Project: fake-news-detection   Author: aldengolab   File: model_loop.py    License: MIT License 5 votes vote down vote up
def define_clfs_params(self):
        '''
        Defines all relevant parameters and classes for classfier objects.
        Edit these if you wish to change parameters.
        '''
        # These are the classifiers
        self.clfs = {
            'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
            'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
            'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
            'LR': LogisticRegression(penalty = 'l1', C = 1e5),
            'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
            'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
            'NB': GaussianNB(),
            'DT': DecisionTreeClassifier(),
            'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
            'KNN': KNeighborsClassifier(n_neighbors = 3)
            }
        # These are the parameters which will be run through
        self.params = {
             'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
             'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
             'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
             'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
             'NB': {},
             'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
             'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
             } 
Example 24
Project: LearningX   Author: ankonzoid   File: DT_sklearn_clf.py    License: MIT License 5 votes vote down vote up
def __init__(self, max_depth=20, min_samples_leaf=10):
        from sklearn.tree import DecisionTreeClassifier
        self.model = DecisionTreeClassifier(max_depth=max_depth,
                                            min_samples_leaf=min_samples_leaf,
                                            criterion="gini", splitter="best") 
Example 25
Project: EvalNE   Author: Dru-Mara   File: pipeline.py    License: MIT License 5 votes vote down vote up
def lp_model(self):
        model = self._config.get('GENERAL', 'lp_model')
        if model == 'LogisticRegression':
            return LogisticRegression(solver='liblinear')
        elif model == 'LogisticRegressionCV':
            return LogisticRegressionCV(Cs=10, cv=5, penalty='l2', scoring='roc_auc', solver='lbfgs', max_iter=100)
        elif model == 'DecisionTreeClassifier':
            return DecisionTreeClassifier()
        elif model == 'SVM':
            parameters = {'C': [0.1, 1, 10, 100, 1000]}
            return GridSearchCV(LinearSVC(), parameters, cv=5)
        else:
            return util.auto_import(model) 
Example 26
Project: typhon   Author: atmtools   File: common.py    License: MIT License 5 votes vote down vote up
def _ice_cloud_model():
        """Return the default model for the ice cloud classifier"""
        # As simple as it is. We do not need a grid search trainer for the DTC
        # since it has already a good performance.
        return DecisionTreeClassifier(
            max_depth=12, random_state=5, # n_estimators=20, max_features=9,
        ) 
Example 27
Project: Python   Author: Ajinkya-Sonawane   File: 23_DecisionTree.py    License: MIT License 5 votes vote down vote up
def train_using_gini(X_train, X_test, y_train,data): 

	# Creating the classifier object 
	clf_gini = DecisionTreeClassifier(criterion = "gini", 
			random_state = 100,max_depth=3, min_samples_leaf=5)
        # Performing training 
	clf_gini.fit(X_train, y_train)
	visualize_tree(data,clf_gini,'gini')
	print('\nFeature Importance : ',clf_gini.feature_importances_)
	return  clf_gini 
	
# Function to perform training with entropy. 
Example 28
Project: Python   Author: Ajinkya-Sonawane   File: 23_DecisionTree.py    License: MIT License 5 votes vote down vote up
def tarin_using_entropy(X_train, X_test, y_train,data): 

	# Decision tree with entropy 
	clf_entropy = DecisionTreeClassifier( 
			criterion = "entropy", random_state = 100, 
			max_depth = 3, min_samples_leaf = 5) 

	# Performing training 
	clf_entropy.fit(X_train, y_train)
	visualize_tree(data,clf_entropy,'entropy')
	print('\nFeature Importance : ',clf_entropy.feature_importances_)
	return clf_entropy


# Function to make predictions 
Example 29
Project: Python   Author: Ajinkya-Sonawane   File: dtree.py    License: MIT License 5 votes vote down vote up
def train_using_gini(X_train, X_test, y_train,data): 

	# Creating the classifier object 
	clf_gini = DecisionTreeClassifier(criterion = "gini", 
			random_state = 100,max_depth=3, min_samples_leaf=5)
        # Performing training 
	clf_gini.fit(X_train, y_train)
	visualize_tree(data,clf_gini,'gini')
	print('\nFeature Importance : ',clf_gini.feature_importances_)
	return  clf_gini 
	
# Function to perform training with entropy. 
Example 30
Project: Python   Author: Ajinkya-Sonawane   File: dtree.py    License: MIT License 5 votes vote down vote up
def tarin_using_entropy(X_train, X_test, y_train,data): 

	# Decision tree with entropy 
	clf_entropy = DecisionTreeClassifier( 
			criterion = "entropy", random_state = 100, 
			max_depth = 3, min_samples_leaf = 5) 

	# Performing training 
	clf_entropy.fit(X_train, y_train)
	visualize_tree(data,clf_entropy,'entropy')
	print('\nFeature Importance : ',clf_entropy.feature_importances_)
	return clf_entropy


# Function to make predictions