Python sklearn.tree() Examples

The following are 30 code examples of sklearn.tree(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn , or try the search function

Example #1

Source File: treeinterpreter.py From treeinterpreter with BSD 3-Clause "New" or "Revised" License

6 votes

def _get_tree_paths(tree, node_id, depth=0):
    """
    Returns all paths through the tree as list of node_ids
    """
    if node_id == _tree.TREE_LEAF:
        raise ValueError("Invalid node_id %s" % _tree.TREE_LEAF)

    left_child = tree.children_left[node_id]
    right_child = tree.children_right[node_id]

    if left_child != _tree.TREE_LEAF:
        left_paths = _get_tree_paths(tree, left_child, depth=depth + 1)
        right_paths = _get_tree_paths(tree, right_child, depth=depth + 1)

        for path in left_paths:
            path.append(node_id)
        for path in right_paths:
            path.append(node_id)
        paths = left_paths + right_paths
    else:
        paths = [[node_id]]
    return paths

Example #2

Source File: stage_1_diagnosis.py From Automated-Cardiac-Segmentation-and-Disease-Diagnosis with MIT License

6 votes

def visualize_tree(tree, feature_names, save_dir='./'):
    """Create tree png using graphviz.

    Args
    ----
    tree -- scikit-learn DecsisionTree.
    feature_names -- list of feature names.
    """
    with open(save_dir+'/'+"dt.dot", 'w') as f:
        export_graphviz(tree, out_file=f,
                        feature_names=feature_names)

    command = ["dot", "-Tpng", save_dir+"/dt.dot", "-o", save_dir+"/dt.png"]
    try:
        subprocess.check_call(command)
    except:
        exit("Could not run dot, ie graphviz, to "
             "produce visualization")

Example #3

Source File: stage_2_diagnosis.py From Automated-Cardiac-Segmentation-and-Disease-Diagnosis with MIT License

6 votes

def visualize_tree(tree, feature_names, save_dir='./'):
    """Create tree png using graphviz.

    Args
    ----
    tree -- scikit-learn DecsisionTree.
    feature_names -- list of feature names.
    """
    with open(save_dir+'/'+"dt.dot", 'w') as f:
        export_graphviz(tree, out_file=f,
                        feature_names=feature_names)

    command = ["dot", "-Tpng", save_dir+"/dt.dot", "-o", save_dir+"/dt.png"]
    try:
        subprocess.check_call(command)
    except:
        exit("Could not run dot, ie graphviz, to "
             "produce visualization")

Example #4

Source File: GetMLPara.py From dr_droid with Apache License 2.0

5 votes

def selection_parameters_for_classfier(X,y):

    from sklearn import grid_search

    #paras={ 'n_neighbors':[1,10], 'weights':['uniform', 'distance'], 'algorithm':['auto', 'ball_tree','kd_tree', 'brute'], 'leaf_size':[20,50]}
    #knn = KNeighborsClassifier()

    #naive_bayes
    #nbg = GaussianNB()
    #nbm = MultinomialNB()
    #nbb = BernoulliNB()

    #decision tree
    #paras={ 'criterion':['gini','entropy'], 'splitter':['random', 'best'], 'max_features':[None, 'auto','sqrt', 'log2'], 'min_samples_split':[1,10]}
    #dtree = DecisionTreeClassifier()

    #random forest
    #rforest = RandomForestClassifier()
    #paras={ 'n_estimators':[2,15], 'criterion':['gini','entropy'], 'max_features': ['auto','sqrt', 'log2'], 'min_samples_split':[1,10]}

    #svm
    svmm = svm.SVC()
    paras={'kernel':['rbf','linear','poly']}


    clt =grid_search.GridSearchCV(svmm, paras, cv=5)
    clt.fit(X,y)
    print (clt)
    #print (clt.get_params())
    print (clt.set_params())
    print (clt.score(X,y))

    #scores = cross_val_score(clt,X,y,cv=10)
    #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))


#this is to get score using cross_validation

Example #5

Source File: test_base.py From twitter-stock-recommendation with MIT License

5 votes

def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator():
    iris = datasets.load_iris()
    tree = TreeNoVersion().fit(iris.data, iris.target)
    tree_pickle_noversion = pickle.dumps(tree)
    try:
        module_backup = TreeNoVersion.__module__
        TreeNoVersion.__module__ = "notsklearn"
        assert_no_warnings(pickle.loads, tree_pickle_noversion)
    finally:
        TreeNoVersion.__module__ = module_backup

Example #6

Source File: test_base.py From twitter-stock-recommendation with MIT License

5 votes

def test_pickle_version_warning_is_issued_when_no_version_info_in_pickle():
    iris = datasets.load_iris()
    # TreeNoVersion has no getstate, like pre-0.18
    tree = TreeNoVersion().fit(iris.data, iris.target)

    tree_pickle_noversion = pickle.dumps(tree)
    assert_false(b"version" in tree_pickle_noversion)
    message = pickle_error_message.format(estimator="TreeNoVersion",
                                          old_version="pre-0.18",
                                          current_version=sklearn.__version__)
    # check we got the warning about using pre-0.18 pickle
    assert_warns_message(UserWarning, message, pickle.loads,
                         tree_pickle_noversion)

Example #7

Source File: test_base.py From twitter-stock-recommendation with MIT License

5 votes

def test_pickle_version_warning_is_issued_upon_different_version():
    iris = datasets.load_iris()
    tree = TreeBadVersion().fit(iris.data, iris.target)
    tree_pickle_other = pickle.dumps(tree)
    message = pickle_error_message.format(estimator="TreeBadVersion",
                                          old_version="something",
                                          current_version=sklearn.__version__)
    assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)

Example #8

Source File: test_base.py From twitter-stock-recommendation with MIT License

5 votes

def test_pickle_version_warning_is_not_raised_with_matching_version():
    iris = datasets.load_iris()
    tree = DecisionTreeClassifier().fit(iris.data, iris.target)
    tree_pickle = pickle.dumps(tree)
    assert_true(b"version" in tree_pickle)
    tree_restored = assert_no_warnings(pickle.loads, tree_pickle)

    # test that we can predict with the restored decision tree classifier
    score_of_original = tree.score(iris.data, iris.target)
    score_of_restored = tree_restored.score(iris.data, iris.target)
    assert_equal(score_of_original, score_of_restored)

Example #9

Source File: baselines.py From rmnist with MIT License

5 votes

def baselines(n):
    td, vd, ts = data_loader.load_data(n)
    classifiers = [
        sklearn.svm.SVC(C=1000),
        sklearn.svm.SVC(kernel="linear", C=0.1),
        sklearn.neighbors.KNeighborsClassifier(1),
        sklearn.tree.DecisionTreeClassifier(),
        sklearn.ensemble.RandomForestClassifier(max_depth=10, n_estimators=500, max_features=1),
        sklearn.neural_network.MLPClassifier(alpha=1, hidden_layer_sizes=(500, 100))
    ]
    for clf in classifiers:
        clf.fit(td[0], td[1])
        print "\n{}: {}".format(type(clf).__name__, round(clf.score(vd[0], vd[1])*100, 2))

Example #10

Source File: transfer.py From rmnist with MIT License

5 votes

def transfer(n):
    td, vd, ts = data_loader.load_data(n, abstract=True, expanded=expanded)
    classifiers = [
        #sklearn.svm.SVC(),
        #sklearn.svm.SVC(kernel="linear", C=0.1),
        #sklearn.neighbors.KNeighborsClassifier(1),
        #sklearn.tree.DecisionTreeClassifier(),
        #sklearn.ensemble.RandomForestClassifier(max_depth=10, n_estimators=500, max_features=1),
        sklearn.neural_network.MLPClassifier(alpha=1.0, hidden_layer_sizes=(300,), max_iter=500)
    ]
    for clf in classifiers:
        clf.fit(td[0], td[1])
        print "\n{}: {}".format(type(clf).__name__, round(clf.score(vd[0], vd[1])*100, 2))

Example #11

Source File: Estimators.py From slates_semisynth_expts with BSD 3-Clause "New" or "Revised" License

5 votes

def reset(self):
        Estimator.reset(self)
        self.savedValues=None
        if self.estimatorType=='tree':
            self.tree=None
        else:
            self.policyParams=None

Example #12

Source File: Estimators.py From slates_semisynth_expts with BSD 3-Clause "New" or "Revised" License

5 votes

def estimate(self, query, logged_ranking, new_ranking, logged_value):
        currentValue=None
        if self.savedValues is not None:
            currentValue=self.savedValues[query]
        else:
            allFeatures=self.loggingPolicy.dataset.features[query][new_ranking,:]
        
            if new_ranking.size < self.rankingSize:
                emptyPad=scipy.sparse.csr_matrix((self.rankingSize-new_ranking.size, self.numFeatures), dtype=numpy.float64)
                allFeatures=scipy.sparse.vstack((allFeatures, emptyPad), format="csr", dtype=numpy.float64)
            
            allFeatures=allFeatures.toarray()
            nRows, nCols = allFeatures.shape
            size=nRows*nCols
            currentFeatures=numpy.reshape(allFeatures, (1,size))

            if self.estimatorType=='tree':
                currentValue=self.tree.predict(currentFeatures)[0]
            else:
                currentValue=numpy.dot(currentFeatures, self.policyParams)[0]
        
            del allFeatures
            del currentFeatures
            
        self.updateRunningAverage(currentValue)
        return self.runningMean

Example #13

Source File: Estimators.py From slates_semisynth_expts with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, ranking_size, logging_policy, target_policy, estimator_type):
        Estimator.__init__(self, ranking_size, logging_policy, target_policy)
        self.name = 'Direct_'+estimator_type
        self.estimatorType = estimator_type
        self.numFeatures=self.loggingPolicy.dataset.features[0].shape[1]
        self.hyperParams={'alpha': (numpy.logspace(-2,1,num=4,base=10)).tolist()}
        self.treeDepths={'max_depth': list(range(3,15,3))}
        
        if self.estimatorType=='tree':
            self.tree=None
        else:
            self.policyParams=None
            
        #This member is set on-demand by estimateAll(...)
        self.savedValues=None

Example #14

Source File: GetMLPara.py From dr_droid with Apache License 2.0

5 votes

def get_fpr_tpr(clt, X, y):

    random_state = np.random.RandomState(0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25 , random_state = 0)

    #from sklearn import tree
    #clt = tree.DecisionTreeClassifier( criterion='entropy', min_samples_leaf=2, min_samples_split=2, random_state=None, splitter='best')
    clt = clt.fit(X_train,y_train)
    #from sklearn.externals.six import StringIO
    #with open("iris_plus.dot", 'w') as f:
    #     f = tree.export_graphviz(clt, out_file=f)

    y_pred = clt.predict(X_test)

    #accuracy score
    _accuracy_score = accuracy_score(y_test, y_pred)

    print ("Accuracy score {}".format(_accuracy_score))

    #roc curve
    probas_ = clt.predict_proba(X_test)
    #print (probas_)
    #draw_confusion_matrix(y_test,y_pred)

    #print probas_
    fpr, tpr, thresholds = roc_curve(y_test, probas_[:, 1])
    #print (fpr, tpr,thresholds)
    roc_auc = auc(fpr, tpr)
    print ("Area under the ROC curve : %f" % roc_auc)

    return fpr, tpr , roc_auc


# this is used to draw

Example #15

Source File: GetMLPara.py From dr_droid with Apache License 2.0

5 votes

def train_and_test(X,y):

    #KNN
    knn = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', n_neighbors=5, p=2, weights='uniform')

    #naive-bayees
    nbbern = BernoulliNB()

    #decision tree
    dtree = DecisionTreeClassifier( criterion='gini', min_samples_leaf=4, min_samples_split=2, random_state=None, splitter='best')

    #random forest
    rforest = RandomForestClassifier(bootstrap=True, criterion='gini', max_depth=None, max_features='auto',  min_samples_leaf=1, min_samples_split=2, n_estimators=10, n_jobs=1, oob_score=False, random_state=3)

    #svm
    svmrbf= svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,  kernel='rbf', max_iter=-1, probability=False, random_state=None,
shrinking=True, tol=0.001, verbose=False)


    get_scroe_using_cv(knn, X, y)
    get_scroe_using_cv(nbbern, X, y)
    get_scroe_using_cv(dtree, X, y)
    get_scroe_using_cv(rforest, X, y)
    get_scroe_using_cv(svmrbf, X, y)
    print ("\n")

######################################################################

#this is to draw the Roc curve example by splitting the dataset
#just want a figure to make it more beautiful

Example #16

Source File: GetMLPara.py From dr_droid with Apache License 2.0

5 votes

def my_get_fp_fn_CV(X_original,y):

    #generate classfiers
    knn = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', n_neighbors=5, p=2, weights='uniform')

    #decision tree
    dtree = DecisionTreeClassifier( criterion='gini', min_samples_leaf=4, min_samples_split=2, random_state=None, splitter='best')

    #naive
    #nbbern = BernoulliNB()

    #random forest
    rforest = RandomForestClassifier(bootstrap=True, criterion='gini', max_depth=None, max_features='auto',  min_samples_leaf=1, min_samples_split=2, n_estimators=10, n_jobs=1, oob_score=False, random_state=3)

    #svm
    svmrbf= svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,  kernel='rbf', max_iter=-1, probability=True, random_state=None,
shrinking=True, tol=0.001, verbose=False)

    #reduce the size
    #X = SelectKBest(f_classif, k=80).fit_transform(X_original,y)
    skb = SelectKBest(f_classif, k=80).fit(X_original,y)
    X = skb.fit_transform(X_original,y)

    print ("KNN")
    my_get_fp_fn_inter(knn,X,y)
    print ("DTree")
    my_get_fp_fn_inter(dtree,X,y)
    print ("rforest")
    my_get_fp_fn_inter(rforest,X,y)
    #print ("naive bayes")
    #my_get_fp_fn_inter(nbbern,X,y)
    print ("SVMrbf")
    my_get_fp_fn_inter(svmrbf,X,y)

Example #17

Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_pickle_version_warning_is_not_raised_with_matching_version():
    iris = datasets.load_iris()
    tree = DecisionTreeClassifier().fit(iris.data, iris.target)
    tree_pickle = pickle.dumps(tree)
    assert b"version" in tree_pickle
    tree_restored = assert_no_warnings(pickle.loads, tree_pickle)

    # test that we can predict with the restored decision tree classifier
    score_of_original = tree.score(iris.data, iris.target)
    score_of_restored = tree_restored.score(iris.data, iris.target)
    assert_equal(score_of_original, score_of_restored)

Example #18

Source File: tree.py From toad with MIT License

5 votes

def dtree(frame, target, criterion = 'gini', depth = None, sample = 0.01, ratio = 0.15):
    tree = DecisionTreeClassifier(
        criterion = criterion,
        min_samples_leaf = sample,
        max_depth = depth,
    )

    tree.fit(frame.fillna(-1), target)

    dot_string = tree_to_dot(tree, frame.columns.values, high_light = ratio)

    dot_to_img(dot_string, file = target.name + '.png')

Example #19

Source File: codecs.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def decode(cls, obj):
        import sklearn.tree

        init_args = obj['init_args']

        state = obj['state']

        # Add max_depth for backwards compatibility with PSC 1.2
        # Previous version did not set the max_depth in the state when calling __getstate__
        # https://github.com/scikit-learn/scikit-learn/blob/51a765acfa4c5d1ec05fc4b406968ad233c75162/sklearn/tree/_tree.pyx#L615

        # and has been added in sklearn 0.18 to be used in both __getstate__ and __setstate__
        # https://github.com/scikit-learn/scikit-learn/blob/ef5cb84a805efbe4bb06516670a9b8c690992bd7/sklearn/tree/_tree.pyx#L649
        
        # Older models will not have the max_depth in their stored state, such that a key error is raised.
        # the max_depth is only used in the decision path method, which we don't currently use
        # and is used to init an np array of zeros in version 0.18:
        # https://github.com/scikit-learn/scikit-learn/blob/ef5cb84a805efbe4bb06516670a9b8c690992bd7/sklearn/tree/_tree.pyx#L926
        # https://github.com/scikit-learn/scikit-learn/blob/ef5cb84a805efbe4bb06516670a9b8c690992bd7/sklearn/tree/_tree.pyx#L991
        state['max_depth'] = state.get('max_depth', 0)

        t = sklearn.tree._tree.Tree(*init_args)

        t.__setstate__(state)

        return t

Example #20

Source File: codecs.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def encode(cls, obj):
        import sklearn.tree
        assert type(obj) == sklearn.tree._tree.Tree

        init_args = obj.__reduce__()[1]
        state = obj.__getstate__()

        return {
            '__mlspl_type': [type(obj).__module__, type(obj).__name__],
            'init_args': init_args,
            'state': state
        }

Example #21

Source File: ensembles.py From Azimuth with BSD 3-Clause "New" or "Revised" License

5 votes

def decisiontree_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum, learn_options):
    '''
    DecisionTreeRegressor from scikitlearn.
    '''
    clf = tree.DecisionTreeRegressor()
    clf.fit(X[train], y[train][:, 0])
    y_pred = clf.predict(X[test])[:, None]
    return y_pred, clf

Example #22

Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator():
    iris = datasets.load_iris()
    tree = TreeNoVersion().fit(iris.data, iris.target)
    tree_pickle_noversion = pickle.dumps(tree)
    try:
        module_backup = TreeNoVersion.__module__
        TreeNoVersion.__module__ = "notsklearn"
        assert_no_warnings(pickle.loads, tree_pickle_noversion)
    finally:
        TreeNoVersion.__module__ = module_backup

Example #23

Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_pickle_version_warning_is_issued_when_no_version_info_in_pickle():
    iris = datasets.load_iris()
    # TreeNoVersion has no getstate, like pre-0.18
    tree = TreeNoVersion().fit(iris.data, iris.target)

    tree_pickle_noversion = pickle.dumps(tree)
    assert b"version" not in tree_pickle_noversion
    message = pickle_error_message.format(estimator="TreeNoVersion",
                                          old_version="pre-0.18",
                                          current_version=sklearn.__version__)
    # check we got the warning about using pre-0.18 pickle
    assert_warns_message(UserWarning, message, pickle.loads,
                         tree_pickle_noversion)

Example #24

Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_pickle_version_warning_is_issued_upon_different_version():
    iris = datasets.load_iris()
    tree = TreeBadVersion().fit(iris.data, iris.target)
    tree_pickle_other = pickle.dumps(tree)
    message = pickle_error_message.format(estimator="TreeBadVersion",
                                          old_version="something",
                                          current_version=sklearn.__version__)
    assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)

Example #25

Source File: tree.py From toad with MIT License

4 votes

def tree_to_dot(tree, features, high_light = 0.15):
    from io import StringIO
    from sklearn.tree import _tree

    out = StringIO()
    tree_ = tree.tree_

    features = np.array([
        features[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ])

    out.write('digraph Tree {\n')
    out.write('edge [fontname="FangSong"];\n')
    out.write('node [shape=box];\n')

    def recurse(node, parent = None, label = None):
        sample = tree_.n_node_samples[node]
        bad_rate = tree_.value[node][0,1] / sample

        out.write('{} [label="'.format(node))

        out.write('bad rate: {:.2%}\n'.format(bad_rate))
        out.write('sample: {:.2%}\n'.format(sample / tree_.n_node_samples[0]))

        # end of label
        out.write('"')

        if bad_rate > high_light:
            out.write(', color="red"')

        # end of node
        out.write('];\n')

        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = features[node]
            threshold = tree_.threshold[node]
            recurse(tree_.children_left[node], node, '{} <= {:.2f}'.format(name, threshold))
            recurse(tree_.children_right[node], node, '{} > {:.2f}'.format(name, threshold))

        if parent is not None:
            out.write('{} -> {} [label="{}"];\n'.format(parent, node, label))

    recurse(0, None)

    out.write('}')
    s = out.getvalue()
    out.close()
    return s

Example #26

Source File: GetMLPara.py From dr_droid with Apache License 2.0

4 votes

def final_train_and_test_after_preparation(X_original,y):

    #KNN
    knn = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', n_neighbors=5, p=2, weights='uniform')

    #decision tree
    dtree = DecisionTreeClassifier( criterion='entropy', min_samples_leaf=4, min_samples_split=2, random_state=None, splitter='best')

    #random forest
    rforest = RandomForestClassifier(bootstrap=True, criterion='gini', max_depth=None, max_features='auto',   min_samples_leaf=1, min_samples_split=2, n_estimators=10, n_jobs=1, oob_score=False, random_state=3)

    #svm
    svmrbf= svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,  kernel='rbf', max_iter=-1, probability=True, random_state=None,
shrinking=True, tol=0.001, verbose=False)

    #naive bayes
    #nbb = BernoulliNB()

    X = SelectKBest(f_classif, k=80).fit_transform(X_original,y)
    #X = X_original
    print (X.shape)
    #get_scroe_using_cv(knn, X, y)
    #get_scroe_using_cv(dtree, X, y)
    #get_scroe_using_cv(rforest, X, y)
    #get_scroe_using_cv(svmrbf, X, y)

    fpr_knn, tpr_knn, auc_knn = get_fpr_tpr(knn, X, y)
    fpr_dtree, tpr_dtree, auc_dtree = get_fpr_tpr(dtree, X, y)
    fpr_rforest, tpr_rforest, auc_rforest = get_fpr_tpr(rforest, X, y)
    fpr_svmrbf, tpr_svmrbf ,auc_svmrbf= get_fpr_tpr(svmrbf, X, y)
    #fpr_nbb, tpr_nbb ,auc_nbb= get_fpr_tpr(nbb, X, y)

    plt.clf()
    plt.plot(fpr_svmrbf, tpr_svmrbf, 'y.--', label ='SVM AUC=%0.4f'% auc_svmrbf)
    plt.plot(fpr_knn, tpr_knn, 'r^--', label='KNN AUC=%0.4f' %auc_knn)
    plt.plot(fpr_dtree, tpr_dtree, 'b>--', label ='D.Tree AUC=%0.4f'% auc_dtree)
    plt.plot(fpr_rforest, tpr_rforest, 'go--', label ='R.Forest AUC=%0.4f'% auc_rforest)
    #plt.plot(fpr_nbb, tpr_nbb, 'c*--', label ='Random Forest auc=%0.4f'% auc_nbb)


    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([-0.02, 1.02])
    plt.ylim([-0.02, 1.02])
    plt.xlabel('FPR(False Positive Rate)',fontsize=20)
    plt.ylabel('TPR(True Positive Rate)',fontsize=20)
    #plt.title('Receiver operating characteristic ')
    plt.legend(loc="lower right")
    plt.tight_layout()
    plt.grid()
    plt.show()

    del X
    del y

##################################DRAW P-R  CURVE#######################################################
############3  this is the precisio and recall curve

Example #27

Source File: Estimators.py From slates_semisynth_expts with BSD 3-Clause "New" or "Revised" License

4 votes

def estimateAll(self, metric=None):
        if self.savedValues is not None:
            return
            
        self.savedValues=[]
        numQueries=len(self.loggingPolicy.dataset.docsPerQuery)
        for query in range(numQueries):
            newRanking=self.targetPolicy.predict(query, self.rankingSize)
            allFeatures=self.loggingPolicy.dataset.features[query][newRanking,:]
        
            if newRanking.size < self.rankingSize:
                emptyPad=scipy.sparse.csr_matrix((self.rankingSize-newRanking.size, self.numFeatures), dtype=numpy.float64)
                allFeatures=scipy.sparse.vstack((allFeatures, emptyPad), format="csr", dtype=numpy.float64)
            
            allFeatures=allFeatures.toarray()
            nRows, nCols = allFeatures.shape
            size=nRows*nCols
            currentFeatures=numpy.reshape(allFeatures, (1,size))
        
            currentValue=None
            if self.estimatorType=='tree':
                currentValue=self.tree.predict(currentFeatures)[0]
            else:
                currentValue=numpy.dot(currentFeatures, self.policyParams)[0]
            
            low=None
            high=None
            if metric is not None:
                low=metric.getMin(newRanking.size)
                high=metric.getMax(newRanking.size)
                
            if low is not None:
                currentValue = max(currentValue, low)
            if high is not None:
                currentValue = min(currentValue, high)

            if currentValue > 1.0 or currentValue < 0.0:
                print("Direct:estimateAll [LOG] estimate %0.3f " % (currentValue), flush=True)

            del allFeatures
            del currentFeatures
            
            self.savedValues.append(currentValue)
            
            if query%100==0:
                print(".", end="", flush=True)
                
        print("")
        print("Direct:estimateAll [LOG] Precomputed estimates.", flush=True)

Example #28

Source File: decision_tree.py From mljar-supervised with MIT License

4 votes

def interpret(
        self,
        X_train,
        y_train,
        X_validation,
        y_validation,
        model_file_path,
        learner_name,
        target_name=None,
        class_names=None,
        metric_name=None,
        ml_task=None,
        explain_level=2,
    ):
        super(DecisionTreeRegressorAlgorithm, self).interpret(
            X_train,
            y_train,
            X_validation,
            y_validation,
            model_file_path,
            learner_name,
            target_name,
            class_names,
            metric_name,
            ml_task,
            explain_level,
        )
        if explain_level == 0:
            return
        try:

            viz = dtreeviz(
                self.model,
                X_train,
                y_train,
                target_name="target",
                feature_names=X_train.columns,
            )
            tree_file_plot = os.path.join(model_file_path, learner_name + "_tree.svg")
            viz.save(tree_file_plot)
        except Exception as e:
            logger.info(f"Problem when visuzalizin decision tree regressor. {str(e)}")

Example #29

Source File: decision_tree.py From mljar-supervised with MIT License

4 votes

def interpret(
        self,
        X_train,
        y_train,
        X_validation,
        y_validation,
        model_file_path,
        learner_name,
        target_name=None,
        class_names=None,
        metric_name=None,
        ml_task=None,
        explain_level=2,
    ):
        super(DecisionTreeAlgorithm, self).interpret(
            X_train,
            y_train,
            X_validation,
            y_validation,
            model_file_path,
            learner_name,
            target_name,
            class_names,
            metric_name,
            ml_task,
            explain_level,
        )
        if explain_level == 0:
            return
        try:
            if len(class_names) > 10:
                # dtreeviz does not support more than 10 classes
                return
            viz = dtreeviz(
                self.model,
                X_train,
                y_train,
                target_name="target",
                feature_names=X_train.columns,
                class_names=class_names,
            )
            tree_file_plot = os.path.join(model_file_path, learner_name + "_tree.svg")
            viz.save(tree_file_plot)
        except Exception as e:
            logger.info(f"Problem when visualizing decision tree. {str(e)}")

Example #30

Source File: treeinterpreter.py From treeinterpreter with BSD 3-Clause "New" or "Revised" License

4 votes

def _predict_forest(model, X, joint_contribution=False):
    """
    For a given RandomForestRegressor, RandomForestClassifier,
    ExtraTreesRegressor, or ExtraTreesClassifier returns a triple of
    [prediction, bias and feature_contributions], such that prediction ≈ bias +
    feature_contributions.
    """

    if joint_contribution:
        biases = []
        contributions = []
        predictions = []
        
        for tree in model.estimators_:
            pred, bias, contribution = _predict_tree(tree, X, joint_contribution=joint_contribution)

            biases.append(bias)
            contributions.append(contribution)
            predictions.append(pred)
        
        
        total_contributions = []
        
        for i in range(len(X)):
            contr = {}
            for j, dct in enumerate(contributions):
                for k in set(dct[i]).union(set(contr.keys())):
                    contr[k] = (contr.get(k, 0)*j + dct[i].get(k,0) ) / (j+1)

            total_contributions.append(contr)    
            
        for i, item in enumerate(contribution):
            total_contributions[i]
            sm = sum([v for v in contribution[i].values()])
                

        
        return (np.mean(predictions, axis=0), np.mean(biases, axis=0),
            total_contributions)
    else:
        mean_pred = None
        mean_bias = None
        mean_contribution = None

        for i, tree in enumerate(model.estimators_):
            pred, bias, contribution = _predict_tree(tree, X)

            if i < 1: # first iteration
                mean_bias = bias
                mean_contribution = contribution
                mean_pred = pred
            else:
                mean_bias = _iterative_mean(i, mean_bias, bias)
                mean_contribution = _iterative_mean(i, mean_contribution, contribution)
                mean_pred = _iterative_mean(i, mean_pred, pred)

        return mean_pred, mean_bias, mean_contribution