Python sklearn.tree.DecisionTreeClassifier() Examples

The following are 30 code examples of sklearn.tree.DecisionTreeClassifier(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.tree , or try the search function .
Example #1
Source File: sasma.py    From unmixing with MIT License 6 votes vote down vote up
def predict(self, fit=None, features=None, probabilities=False):
        '''
        Predict the class labels (e.g., endmember types) based on an existing
        tree fit and new predictive features. Arguments:
            fit         The result of tree.DecisionTreeClassifier.fit(); uses
                        the last fit model if None.
            features    The new X array/ new predictive features to use;
                        should be (p x n), n samples with p features.
        '''
        if fit is None: fit = self.last_fit
        if features is None: features = self.x_features_array
        if probabilities:
            shp = self.y_raster.shape
            return fit.predict(features.T).T.reshape((self.n_labels, shp[1], shp[2]))

        return fit.predict(features.T).reshape(self.y_raster.shape) 
Example #2
Source File: AdaBoost_Classify.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 6 votes vote down vote up
def Train(data, modelcount, censhu, yanzhgdata):
    model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=censhu),
                               algorithm="SAMME",
                               n_estimators=modelcount, learning_rate=0.8)

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = fmse(data[:, -1], train_out)[0]

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算f1度量
    add_mse = fmse(yanzhgdata[:, -1], add_yan)[0]
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
Example #3
Source File: AdaBoost_Classify.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 6 votes vote down vote up
def recspre(estrs, predata, datadict, zhe):

    mo, ze = estrs.split('-')
    model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=int(ze)),
                               algorithm="SAMME",
                               n_estimators=int(mo), learning_rate=0.8)

    model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1])

    # 预测
    yucede = model.predict(predata[:, :-1])
    # 计算混淆矩阵

    print(ConfuseMatrix(predata[:, -1], yucede))

    return fmse(predata[:, -1], yucede)

# 主函数 
Example #4
Source File: learn_nse.py    From scikit-multiflow with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self,
                 base_estimator=DecisionTreeClassifier(),
                 window_size=250,
                 slope=0.5,
                 crossing_point=10,
                 n_estimators=15,
                 pruning=None):
        super().__init__()
        self.ensemble = []
        self.ensemble_weights = []
        self.bkts = []
        self.wkts = []
        self.buffer = []
        self.window_size = window_size
        self.slope = slope
        self.crossing_point = crossing_point
        self.n_estimators = n_estimators
        self.pruning = pruning
        self.X_batch = []
        self.y_batch = []
        self.instance_weights = []
        self.base_estimator = cp.deepcopy(base_estimator)
        self.classes = None 
Example #5
Source File: learn_pp.py    From scikit-multiflow with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self, base_estimator=DecisionTreeClassifier(),
                 error_threshold=0.5,
                 n_estimators=30,
                 n_ensembles=10,
                 window_size=100,
                 random_state=None):
        super().__init__()
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.ensembles = []
        self.ensemble_weights = []
        self.classes = None
        self.n_ensembles = n_ensembles
        self.random = check_random_state(random_state)
        self.random_state = random_state
        self.error_threshold = error_threshold
        self.X_batch = []
        self.y_batch = []
        self.window_size = window_size 
Example #6
Source File: utils.py    From m2cgen with MIT License 6 votes vote down vote up
def __call__(self, estimator):
        fitted_estimator = estimator.fit(self.X_train, self.y_train)

        if isinstance(estimator, (LinearClassifierMixin, SVC, NuSVC,
                                  LightBaseClassifier)):
            y_pred = estimator.decision_function(self.X_test)
        elif isinstance(estimator, DecisionTreeClassifier):
            y_pred = estimator.predict_proba(self.X_test.astype(np.float32))
        elif isinstance(
                estimator,
                (ForestClassifier, XGBClassifier, LGBMClassifier)):
            y_pred = estimator.predict_proba(self.X_test)
        else:
            y_pred = estimator.predict(self.X_test)

        return self.X_test, y_pred, fitted_estimator 
Example #7
Source File: test_bagging.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_classification():
    # Check classification for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [1, 2, 4],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyClassifier(),
                           Perceptron(tol=1e-3),
                           DecisionTreeClassifier(),
                           KNeighborsClassifier(),
                           SVC(gamma="scale")]:
        for params in grid:
            BaggingClassifier(base_estimator=base_estimator,
                              random_state=rng,
                              **params).fit(X_train, y_train).predict(X_test) 
Example #8
Source File: test_weight_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_gridsearch():
    # Check that base trees can be grid-searched.
    # AdaBoost classification
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2),
                  'algorithm': ('SAMME', 'SAMME.R')}
    clf = GridSearchCV(boost, parameters)
    clf.fit(iris.data, iris.target)

    # AdaBoost regression
    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
                              random_state=0)
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2)}
    clf = GridSearchCV(boost, parameters)
    clf.fit(boston.data, boston.target) 
Example #9
Source File: test_export.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_plot_tree(pyplot):
    # mostly smoke tests
    # Check correctness of export_graphviz
    clf = DecisionTreeClassifier(max_depth=3,
                                 min_samples_split=2,
                                 criterion="gini",
                                 random_state=2)
    clf.fit(X, y)

    # Test export code
    feature_names = ['first feat', 'sepal_width']
    nodes = plot_tree(clf, feature_names=feature_names)
    assert len(nodes) == 3
    assert nodes[0].get_text() == ("first feat <= 0.0\nentropy = 0.5\n"
                                   "samples = 6\nvalue = [3, 3]")
    assert nodes[1].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [3, 0]"
    assert nodes[2].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [0, 3]" 
Example #10
Source File: test_tree.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_probability():
    # Predict probabilities using DecisionTreeClassifier.

    for name, Tree in CLF_TREES.items():
        clf = Tree(max_depth=1, max_features=1, random_state=42)
        clf.fit(iris.data, iris.target)

        prob_predict = clf.predict_proba(iris.data)
        assert_array_almost_equal(np.sum(prob_predict, 1),
                                  np.ones(iris.data.shape[0]),
                                  err_msg="Failed with {0}".format(name))
        assert_array_equal(np.argmax(prob_predict, 1),
                           clf.predict(iris.data),
                           err_msg="Failed with {0}".format(name))
        assert_almost_equal(clf.predict_proba(iris.data),
                            np.exp(clf.predict_log_proba(iris.data)), 8,
                            err_msg="Failed with {0}".format(name)) 
Example #11
Source File: test_tree.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_importances_gini_equal_mse():
    # Check that gini is equivalent to mse for binary output variable

    X, y = datasets.make_classification(n_samples=2000,
                                        n_features=10,
                                        n_informative=3,
                                        n_redundant=0,
                                        n_repeated=0,
                                        shuffle=False,
                                        random_state=0)

    # The gini index and the mean square error (variance) might differ due
    # to numerical instability. Since those instabilities mainly occurs at
    # high tree depth, we restrict this maximal depth.
    clf = DecisionTreeClassifier(criterion="gini", max_depth=5,
                                 random_state=0).fit(X, y)
    reg = DecisionTreeRegressor(criterion="mse", max_depth=5,
                                random_state=0).fit(X, y)

    assert_almost_equal(clf.feature_importances_, reg.feature_importances_)
    assert_array_equal(clf.tree_.feature, reg.tree_.feature)
    assert_array_equal(clf.tree_.children_left, reg.tree_.children_left)
    assert_array_equal(clf.tree_.children_right, reg.tree_.children_right)
    assert_array_equal(clf.tree_.n_node_samples, reg.tree_.n_node_samples) 
Example #12
Source File: test_tree.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_sample_weight_invalid():
    # Check sample weighting raises errors.
    X = np.arange(100)[:, np.newaxis]
    y = np.ones(100)
    y[:50] = 0.0

    clf = DecisionTreeClassifier(random_state=0)

    sample_weight = np.random.rand(100, 1)
    assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight)

    sample_weight = np.array(0)
    assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight)

    sample_weight = np.ones(101)
    assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight)

    sample_weight = np.ones(99)
    assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight) 
Example #13
Source File: test_tree.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_huge_allocations():
    n_bits = 8 * struct.calcsize("P")

    X = np.random.randn(10, 2)
    y = np.random.randint(0, 2, 10)

    # Sanity check: we cannot request more memory than the size of the address
    # space. Currently raises OverflowError.
    huge = 2 ** (n_bits + 1)
    clf = DecisionTreeClassifier(splitter='best', max_leaf_nodes=huge)
    assert_raises(Exception, clf.fit, X, y)

    # Non-regression test: MemoryError used to be dropped by Cython
    # because of missing "except *".
    huge = 2 ** (n_bits - 1) - 1
    clf = DecisionTreeClassifier(splitter='best', max_leaf_nodes=huge)
    assert_raises(MemoryError, clf.fit, X, y) 
Example #14
Source File: test_base.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_set_params_passes_all_parameters():
    # Make sure all parameters are passed together to set_params
    # of nested estimator. Regression test for #9944

    class TestDecisionTree(DecisionTreeClassifier):
        def set_params(self, **kwargs):
            super().set_params(**kwargs)
            # expected_kwargs is in test scope
            assert kwargs == expected_kwargs
            return self

    expected_kwargs = {'max_depth': 5, 'min_samples_leaf': 2}
    for est in [Pipeline([('estimator', TestDecisionTree())]),
                GridSearchCV(TestDecisionTree(), {})]:
        est.set_params(estimator__max_depth=5,
                       estimator__min_samples_leaf=2) 
Example #15
Source File: test_base.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_score_sample_weight():

    rng = np.random.RandomState(0)

    # test both ClassifierMixin and RegressorMixin
    estimators = [DecisionTreeClassifier(max_depth=2),
                  DecisionTreeRegressor(max_depth=2)]
    sets = [datasets.load_iris(),
            datasets.load_boston()]

    for est, ds in zip(estimators, sets):
        est.fit(ds.data, ds.target)
        # generate random sample weights
        sample_weight = rng.randint(1, 10, size=len(ds.target))
        # check that the score with and without sample weights are different
        assert_not_equal(est.score(ds.data, ds.target),
                         est.score(ds.data, ds.target,
                                   sample_weight=sample_weight),
                         msg="Unweighted and weighted scores "
                             "are unexpectedly equal") 
Example #16
Source File: test_pdpbox.py    From docker-python with Apache License 2.0 6 votes vote down vote up
def test_simple_pdp(self):
        # set up data
        data = pd.read_csv("/input/tests/data/fifa_2018_stats.csv")
        y = (data['Man of the Match'] == "Yes")
        feature_names = [i for i in data.columns if data[i].dtype in [np.int64]]
        X = data[feature_names]
        train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)
        # Build simple model
        tree_model = DecisionTreeClassifier(random_state=0,
                                            max_depth=3).fit(train_X, train_y)

        # Set up pdp as table
        pdp_goals = pdp.pdp_isolate(model=tree_model,
                                    dataset=val_X,
                                    model_features=feature_names,
                                    feature='Goal Scored')
        # make plot
        pdp.pdp_plot(pdp_goals, 'Goal Scored') 
Example #17
Source File: testScoreWithAdapaSklearn.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_17_decisiontreeclassifier(self):
        print("\ntest 17 (decision tree classifier with preprocessing) [multi-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification()

        model = DecisionTreeClassifier()
        pipeline_obj = Pipeline([
            ("scaler", Binarizer()),
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test17sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example #18
Source File: testScoreWithAdapaSklearn.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_18_decisiontreeclassifier(self):
        print("\ntest 18 (decision tree classifier with preprocessing) [binary-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification()

        model = DecisionTreeClassifier()
        pipeline_obj = Pipeline([
            ("scaler", Binarizer()),
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test18sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example #19
Source File: testScoreWithAdapaSklearn.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_19_decisiontreeclassifier(self):
        print("\ntest 19 (decision tree classifier without preprocessing) [multi-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification()

        model = DecisionTreeClassifier()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test19sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example #20
Source File: testScoreWithAdapaSklearn.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_20_decisiontreeclassifier(self):
        print("\ntest 20 (decision tree classifier without preprocessing) [binary-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification()

        model = DecisionTreeClassifier()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test20sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example #21
Source File: TrAdaBoost.py    From transferlearning with MIT License 5 votes vote down vote up
def train_classify(self, trans_data, trans_label, test_data, P):
        clf = tree.DecisionTreeClassifier(criterion="gini", max_features="log2", splitter="random")
        clf.fit(trans_data, trans_label, sample_weight=P[:, 0])
        return clf.predict(test_data) 
Example #22
Source File: utils.py    From contextualbandits with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def __init__(self, beta_prior=(1,1), ts=False, alpha=0.8, random_state=None,
                 *args, **kwargs):
        self.beta_prior = beta_prior
        self.random_state = random_state
        self.conf_coef = alpha
        self.ts = bool(ts)
        self.model = DecisionTreeClassifier(*args, **kwargs)
        self.is_fitted = False
        self.aux_beta = (beta_prior[0], beta_prior[1]) 
Example #23
Source File: model_loop.py    From fake-news-detection with MIT License 5 votes vote down vote up
def define_clfs_params(self):
        '''
        Defines all relevant parameters and classes for classfier objects.
        Edit these if you wish to change parameters.
        '''
        # These are the classifiers
        self.clfs = {
            'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
            'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
            'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
            'LR': LogisticRegression(penalty = 'l1', C = 1e5),
            'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
            'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
            'NB': GaussianNB(),
            'DT': DecisionTreeClassifier(),
            'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
            'KNN': KNeighborsClassifier(n_neighbors = 3)
            }
        # These are the parameters which will be run through
        self.params = {
             'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
             'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
             'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
             'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
             'NB': {},
             'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
             'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
             } 
Example #24
Source File: DT_sklearn_clf.py    From LearningX with MIT License 5 votes vote down vote up
def __init__(self, max_depth=20, min_samples_leaf=10):
        from sklearn.tree import DecisionTreeClassifier
        self.model = DecisionTreeClassifier(max_depth=max_depth,
                                            min_samples_leaf=min_samples_leaf,
                                            criterion="gini", splitter="best") 
Example #25
Source File: pipeline.py    From EvalNE with MIT License 5 votes vote down vote up
def lp_model(self):
        model = self._config.get('GENERAL', 'lp_model')
        if model == 'LogisticRegression':
            return LogisticRegression(solver='liblinear')
        elif model == 'LogisticRegressionCV':
            return LogisticRegressionCV(Cs=10, cv=5, penalty='l2', scoring='roc_auc', solver='lbfgs', max_iter=100)
        elif model == 'DecisionTreeClassifier':
            return DecisionTreeClassifier()
        elif model == 'SVM':
            parameters = {'C': [0.1, 1, 10, 100, 1000]}
            return GridSearchCV(LinearSVC(), parameters, cv=5)
        else:
            return util.auto_import(model) 
Example #26
Source File: common.py    From typhon with MIT License 5 votes vote down vote up
def _ice_cloud_model():
        """Return the default model for the ice cloud classifier"""
        # As simple as it is. We do not need a grid search trainer for the DTC
        # since it has already a good performance.
        return DecisionTreeClassifier(
            max_depth=12, random_state=5, # n_estimators=20, max_features=9,
        ) 
Example #27
Source File: 23_DecisionTree.py    From Python with MIT License 5 votes vote down vote up
def train_using_gini(X_train, X_test, y_train,data): 

	# Creating the classifier object 
	clf_gini = DecisionTreeClassifier(criterion = "gini", 
			random_state = 100,max_depth=3, min_samples_leaf=5)
        # Performing training 
	clf_gini.fit(X_train, y_train)
	visualize_tree(data,clf_gini,'gini')
	print('\nFeature Importance : ',clf_gini.feature_importances_)
	return  clf_gini 
	
# Function to perform training with entropy. 
Example #28
Source File: 23_DecisionTree.py    From Python with MIT License 5 votes vote down vote up
def tarin_using_entropy(X_train, X_test, y_train,data): 

	# Decision tree with entropy 
	clf_entropy = DecisionTreeClassifier( 
			criterion = "entropy", random_state = 100, 
			max_depth = 3, min_samples_leaf = 5) 

	# Performing training 
	clf_entropy.fit(X_train, y_train)
	visualize_tree(data,clf_entropy,'entropy')
	print('\nFeature Importance : ',clf_entropy.feature_importances_)
	return clf_entropy


# Function to make predictions 
Example #29
Source File: dtree.py    From Python with MIT License 5 votes vote down vote up
def train_using_gini(X_train, X_test, y_train,data): 

	# Creating the classifier object 
	clf_gini = DecisionTreeClassifier(criterion = "gini", 
			random_state = 100,max_depth=3, min_samples_leaf=5)
        # Performing training 
	clf_gini.fit(X_train, y_train)
	visualize_tree(data,clf_gini,'gini')
	print('\nFeature Importance : ',clf_gini.feature_importances_)
	return  clf_gini 
	
# Function to perform training with entropy. 
Example #30
Source File: dtree.py    From Python with MIT License 5 votes vote down vote up
def tarin_using_entropy(X_train, X_test, y_train,data): 

	# Decision tree with entropy 
	clf_entropy = DecisionTreeClassifier( 
			criterion = "entropy", random_state = 100, 
			max_depth = 3, min_samples_leaf = 5) 

	# Performing training 
	clf_entropy.fit(X_train, y_train)
	visualize_tree(data,clf_entropy,'entropy')
	print('\nFeature Importance : ',clf_entropy.feature_importances_)
	return clf_entropy


# Function to make predictions