Python sklearn.tree() Examples
The following are 30
code examples of sklearn.tree().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn
, or try the search function
.

Example #1
Source File: treeinterpreter.py From treeinterpreter with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _get_tree_paths(tree, node_id, depth=0): """ Returns all paths through the tree as list of node_ids """ if node_id == _tree.TREE_LEAF: raise ValueError("Invalid node_id %s" % _tree.TREE_LEAF) left_child = tree.children_left[node_id] right_child = tree.children_right[node_id] if left_child != _tree.TREE_LEAF: left_paths = _get_tree_paths(tree, left_child, depth=depth + 1) right_paths = _get_tree_paths(tree, right_child, depth=depth + 1) for path in left_paths: path.append(node_id) for path in right_paths: path.append(node_id) paths = left_paths + right_paths else: paths = [[node_id]] return paths
Example #2
Source File: stage_1_diagnosis.py From Automated-Cardiac-Segmentation-and-Disease-Diagnosis with MIT License | 6 votes |
def visualize_tree(tree, feature_names, save_dir='./'): """Create tree png using graphviz. Args ---- tree -- scikit-learn DecsisionTree. feature_names -- list of feature names. """ with open(save_dir+'/'+"dt.dot", 'w') as f: export_graphviz(tree, out_file=f, feature_names=feature_names) command = ["dot", "-Tpng", save_dir+"/dt.dot", "-o", save_dir+"/dt.png"] try: subprocess.check_call(command) except: exit("Could not run dot, ie graphviz, to " "produce visualization")
Example #3
Source File: stage_2_diagnosis.py From Automated-Cardiac-Segmentation-and-Disease-Diagnosis with MIT License | 6 votes |
def visualize_tree(tree, feature_names, save_dir='./'): """Create tree png using graphviz. Args ---- tree -- scikit-learn DecsisionTree. feature_names -- list of feature names. """ with open(save_dir+'/'+"dt.dot", 'w') as f: export_graphviz(tree, out_file=f, feature_names=feature_names) command = ["dot", "-Tpng", save_dir+"/dt.dot", "-o", save_dir+"/dt.png"] try: subprocess.check_call(command) except: exit("Could not run dot, ie graphviz, to " "produce visualization")
Example #4
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_pickle_version_warning_is_not_raised_with_matching_version(): iris = datasets.load_iris() tree = DecisionTreeClassifier().fit(iris.data, iris.target) tree_pickle = pickle.dumps(tree) assert b"version" in tree_pickle tree_restored = assert_no_warnings(pickle.loads, tree_pickle) # test that we can predict with the restored decision tree classifier score_of_original = tree.score(iris.data, iris.target) score_of_restored = tree_restored.score(iris.data, iris.target) assert_equal(score_of_original, score_of_restored)
Example #5
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_pickle_version_warning_is_issued_upon_different_version(): iris = datasets.load_iris() tree = TreeBadVersion().fit(iris.data, iris.target) tree_pickle_other = pickle.dumps(tree) message = pickle_error_message.format(estimator="TreeBadVersion", old_version="something", current_version=sklearn.__version__) assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)
Example #6
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_pickle_version_warning_is_issued_when_no_version_info_in_pickle(): iris = datasets.load_iris() # TreeNoVersion has no getstate, like pre-0.18 tree = TreeNoVersion().fit(iris.data, iris.target) tree_pickle_noversion = pickle.dumps(tree) assert b"version" not in tree_pickle_noversion message = pickle_error_message.format(estimator="TreeNoVersion", old_version="pre-0.18", current_version=sklearn.__version__) # check we got the warning about using pre-0.18 pickle assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_noversion)
Example #7
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator(): iris = datasets.load_iris() tree = TreeNoVersion().fit(iris.data, iris.target) tree_pickle_noversion = pickle.dumps(tree) try: module_backup = TreeNoVersion.__module__ TreeNoVersion.__module__ = "notsklearn" assert_no_warnings(pickle.loads, tree_pickle_noversion) finally: TreeNoVersion.__module__ = module_backup
Example #8
Source File: ensembles.py From Azimuth with BSD 3-Clause "New" or "Revised" License | 5 votes |
def decisiontree_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum, learn_options): ''' DecisionTreeRegressor from scikitlearn. ''' clf = tree.DecisionTreeRegressor() clf.fit(X[train], y[train][:, 0]) y_pred = clf.predict(X[test])[:, None] return y_pred, clf
Example #9
Source File: codecs.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def encode(cls, obj): import sklearn.tree assert type(obj) == sklearn.tree._tree.Tree init_args = obj.__reduce__()[1] state = obj.__getstate__() return { '__mlspl_type': [type(obj).__module__, type(obj).__name__], 'init_args': init_args, 'state': state }
Example #10
Source File: codecs.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def decode(cls, obj): import sklearn.tree init_args = obj['init_args'] state = obj['state'] # Add max_depth for backwards compatibility with PSC 1.2 # Previous version did not set the max_depth in the state when calling __getstate__ # https://github.com/scikit-learn/scikit-learn/blob/51a765acfa4c5d1ec05fc4b406968ad233c75162/sklearn/tree/_tree.pyx#L615 # and has been added in sklearn 0.18 to be used in both __getstate__ and __setstate__ # https://github.com/scikit-learn/scikit-learn/blob/ef5cb84a805efbe4bb06516670a9b8c690992bd7/sklearn/tree/_tree.pyx#L649 # Older models will not have the max_depth in their stored state, such that a key error is raised. # the max_depth is only used in the decision path method, which we don't currently use # and is used to init an np array of zeros in version 0.18: # https://github.com/scikit-learn/scikit-learn/blob/ef5cb84a805efbe4bb06516670a9b8c690992bd7/sklearn/tree/_tree.pyx#L926 # https://github.com/scikit-learn/scikit-learn/blob/ef5cb84a805efbe4bb06516670a9b8c690992bd7/sklearn/tree/_tree.pyx#L991 state['max_depth'] = state.get('max_depth', 0) t = sklearn.tree._tree.Tree(*init_args) t.__setstate__(state) return t
Example #11
Source File: tree.py From toad with MIT License | 5 votes |
def dtree(frame, target, criterion = 'gini', depth = None, sample = 0.01, ratio = 0.15): tree = DecisionTreeClassifier( criterion = criterion, min_samples_leaf = sample, max_depth = depth, ) tree.fit(frame.fillna(-1), target) dot_string = tree_to_dot(tree, frame.columns.values, high_light = ratio) dot_to_img(dot_string, file = target.name + '.png')
Example #12
Source File: GetMLPara.py From dr_droid with Apache License 2.0 | 5 votes |
def selection_parameters_for_classfier(X,y): from sklearn import grid_search #paras={ 'n_neighbors':[1,10], 'weights':['uniform', 'distance'], 'algorithm':['auto', 'ball_tree','kd_tree', 'brute'], 'leaf_size':[20,50]} #knn = KNeighborsClassifier() #naive_bayes #nbg = GaussianNB() #nbm = MultinomialNB() #nbb = BernoulliNB() #decision tree #paras={ 'criterion':['gini','entropy'], 'splitter':['random', 'best'], 'max_features':[None, 'auto','sqrt', 'log2'], 'min_samples_split':[1,10]} #dtree = DecisionTreeClassifier() #random forest #rforest = RandomForestClassifier() #paras={ 'n_estimators':[2,15], 'criterion':['gini','entropy'], 'max_features': ['auto','sqrt', 'log2'], 'min_samples_split':[1,10]} #svm svmm = svm.SVC() paras={'kernel':['rbf','linear','poly']} clt =grid_search.GridSearchCV(svmm, paras, cv=5) clt.fit(X,y) print (clt) #print (clt.get_params()) print (clt.set_params()) print (clt.score(X,y)) #scores = cross_val_score(clt,X,y,cv=10) #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) #this is to get score using cross_validation
Example #13
Source File: GetMLPara.py From dr_droid with Apache License 2.0 | 5 votes |
def my_get_fp_fn_CV(X_original,y): #generate classfiers knn = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', n_neighbors=5, p=2, weights='uniform') #decision tree dtree = DecisionTreeClassifier( criterion='gini', min_samples_leaf=4, min_samples_split=2, random_state=None, splitter='best') #naive #nbbern = BernoulliNB() #random forest rforest = RandomForestClassifier(bootstrap=True, criterion='gini', max_depth=None, max_features='auto', min_samples_leaf=1, min_samples_split=2, n_estimators=10, n_jobs=1, oob_score=False, random_state=3) #svm svmrbf= svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, kernel='rbf', max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001, verbose=False) #reduce the size #X = SelectKBest(f_classif, k=80).fit_transform(X_original,y) skb = SelectKBest(f_classif, k=80).fit(X_original,y) X = skb.fit_transform(X_original,y) print ("KNN") my_get_fp_fn_inter(knn,X,y) print ("DTree") my_get_fp_fn_inter(dtree,X,y) print ("rforest") my_get_fp_fn_inter(rforest,X,y) #print ("naive bayes") #my_get_fp_fn_inter(nbbern,X,y) print ("SVMrbf") my_get_fp_fn_inter(svmrbf,X,y)
Example #14
Source File: GetMLPara.py From dr_droid with Apache License 2.0 | 5 votes |
def train_and_test(X,y): #KNN knn = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', n_neighbors=5, p=2, weights='uniform') #naive-bayees nbbern = BernoulliNB() #decision tree dtree = DecisionTreeClassifier( criterion='gini', min_samples_leaf=4, min_samples_split=2, random_state=None, splitter='best') #random forest rforest = RandomForestClassifier(bootstrap=True, criterion='gini', max_depth=None, max_features='auto', min_samples_leaf=1, min_samples_split=2, n_estimators=10, n_jobs=1, oob_score=False, random_state=3) #svm svmrbf= svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) get_scroe_using_cv(knn, X, y) get_scroe_using_cv(nbbern, X, y) get_scroe_using_cv(dtree, X, y) get_scroe_using_cv(rforest, X, y) get_scroe_using_cv(svmrbf, X, y) print ("\n") ###################################################################### #this is to draw the Roc curve example by splitting the dataset #just want a figure to make it more beautiful
Example #15
Source File: GetMLPara.py From dr_droid with Apache License 2.0 | 5 votes |
def get_fpr_tpr(clt, X, y): random_state = np.random.RandomState(0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25 , random_state = 0) #from sklearn import tree #clt = tree.DecisionTreeClassifier( criterion='entropy', min_samples_leaf=2, min_samples_split=2, random_state=None, splitter='best') clt = clt.fit(X_train,y_train) #from sklearn.externals.six import StringIO #with open("iris_plus.dot", 'w') as f: # f = tree.export_graphviz(clt, out_file=f) y_pred = clt.predict(X_test) #accuracy score _accuracy_score = accuracy_score(y_test, y_pred) print ("Accuracy score {}".format(_accuracy_score)) #roc curve probas_ = clt.predict_proba(X_test) #print (probas_) #draw_confusion_matrix(y_test,y_pred) #print probas_ fpr, tpr, thresholds = roc_curve(y_test, probas_[:, 1]) #print (fpr, tpr,thresholds) roc_auc = auc(fpr, tpr) print ("Area under the ROC curve : %f" % roc_auc) return fpr, tpr , roc_auc # this is used to draw
Example #16
Source File: Estimators.py From slates_semisynth_expts with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, ranking_size, logging_policy, target_policy, estimator_type): Estimator.__init__(self, ranking_size, logging_policy, target_policy) self.name = 'Direct_'+estimator_type self.estimatorType = estimator_type self.numFeatures=self.loggingPolicy.dataset.features[0].shape[1] self.hyperParams={'alpha': (numpy.logspace(-2,1,num=4,base=10)).tolist()} self.treeDepths={'max_depth': list(range(3,15,3))} if self.estimatorType=='tree': self.tree=None else: self.policyParams=None #This member is set on-demand by estimateAll(...) self.savedValues=None
Example #17
Source File: Estimators.py From slates_semisynth_expts with BSD 3-Clause "New" or "Revised" License | 5 votes |
def estimate(self, query, logged_ranking, new_ranking, logged_value): currentValue=None if self.savedValues is not None: currentValue=self.savedValues[query] else: allFeatures=self.loggingPolicy.dataset.features[query][new_ranking,:] if new_ranking.size < self.rankingSize: emptyPad=scipy.sparse.csr_matrix((self.rankingSize-new_ranking.size, self.numFeatures), dtype=numpy.float64) allFeatures=scipy.sparse.vstack((allFeatures, emptyPad), format="csr", dtype=numpy.float64) allFeatures=allFeatures.toarray() nRows, nCols = allFeatures.shape size=nRows*nCols currentFeatures=numpy.reshape(allFeatures, (1,size)) if self.estimatorType=='tree': currentValue=self.tree.predict(currentFeatures)[0] else: currentValue=numpy.dot(currentFeatures, self.policyParams)[0] del allFeatures del currentFeatures self.updateRunningAverage(currentValue) return self.runningMean
Example #18
Source File: Estimators.py From slates_semisynth_expts with BSD 3-Clause "New" or "Revised" License | 5 votes |
def reset(self): Estimator.reset(self) self.savedValues=None if self.estimatorType=='tree': self.tree=None else: self.policyParams=None
Example #19
Source File: transfer.py From rmnist with MIT License | 5 votes |
def transfer(n): td, vd, ts = data_loader.load_data(n, abstract=True, expanded=expanded) classifiers = [ #sklearn.svm.SVC(), #sklearn.svm.SVC(kernel="linear", C=0.1), #sklearn.neighbors.KNeighborsClassifier(1), #sklearn.tree.DecisionTreeClassifier(), #sklearn.ensemble.RandomForestClassifier(max_depth=10, n_estimators=500, max_features=1), sklearn.neural_network.MLPClassifier(alpha=1.0, hidden_layer_sizes=(300,), max_iter=500) ] for clf in classifiers: clf.fit(td[0], td[1]) print "\n{}: {}".format(type(clf).__name__, round(clf.score(vd[0], vd[1])*100, 2))
Example #20
Source File: baselines.py From rmnist with MIT License | 5 votes |
def baselines(n): td, vd, ts = data_loader.load_data(n) classifiers = [ sklearn.svm.SVC(C=1000), sklearn.svm.SVC(kernel="linear", C=0.1), sklearn.neighbors.KNeighborsClassifier(1), sklearn.tree.DecisionTreeClassifier(), sklearn.ensemble.RandomForestClassifier(max_depth=10, n_estimators=500, max_features=1), sklearn.neural_network.MLPClassifier(alpha=1, hidden_layer_sizes=(500, 100)) ] for clf in classifiers: clf.fit(td[0], td[1]) print "\n{}: {}".format(type(clf).__name__, round(clf.score(vd[0], vd[1])*100, 2))
Example #21
Source File: test_base.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_pickle_version_warning_is_not_raised_with_matching_version(): iris = datasets.load_iris() tree = DecisionTreeClassifier().fit(iris.data, iris.target) tree_pickle = pickle.dumps(tree) assert_true(b"version" in tree_pickle) tree_restored = assert_no_warnings(pickle.loads, tree_pickle) # test that we can predict with the restored decision tree classifier score_of_original = tree.score(iris.data, iris.target) score_of_restored = tree_restored.score(iris.data, iris.target) assert_equal(score_of_original, score_of_restored)
Example #22
Source File: test_base.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_pickle_version_warning_is_issued_upon_different_version(): iris = datasets.load_iris() tree = TreeBadVersion().fit(iris.data, iris.target) tree_pickle_other = pickle.dumps(tree) message = pickle_error_message.format(estimator="TreeBadVersion", old_version="something", current_version=sklearn.__version__) assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)
Example #23
Source File: test_base.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_pickle_version_warning_is_issued_when_no_version_info_in_pickle(): iris = datasets.load_iris() # TreeNoVersion has no getstate, like pre-0.18 tree = TreeNoVersion().fit(iris.data, iris.target) tree_pickle_noversion = pickle.dumps(tree) assert_false(b"version" in tree_pickle_noversion) message = pickle_error_message.format(estimator="TreeNoVersion", old_version="pre-0.18", current_version=sklearn.__version__) # check we got the warning about using pre-0.18 pickle assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_noversion)
Example #24
Source File: test_base.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator(): iris = datasets.load_iris() tree = TreeNoVersion().fit(iris.data, iris.target) tree_pickle_noversion = pickle.dumps(tree) try: module_backup = TreeNoVersion.__module__ TreeNoVersion.__module__ = "notsklearn" assert_no_warnings(pickle.loads, tree_pickle_noversion) finally: TreeNoVersion.__module__ = module_backup
Example #25
Source File: treeinterpreter.py From treeinterpreter with BSD 3-Clause "New" or "Revised" License | 4 votes |
def _predict_forest(model, X, joint_contribution=False): """ For a given RandomForestRegressor, RandomForestClassifier, ExtraTreesRegressor, or ExtraTreesClassifier returns a triple of [prediction, bias and feature_contributions], such that prediction ≈ bias + feature_contributions. """ if joint_contribution: biases = [] contributions = [] predictions = [] for tree in model.estimators_: pred, bias, contribution = _predict_tree(tree, X, joint_contribution=joint_contribution) biases.append(bias) contributions.append(contribution) predictions.append(pred) total_contributions = [] for i in range(len(X)): contr = {} for j, dct in enumerate(contributions): for k in set(dct[i]).union(set(contr.keys())): contr[k] = (contr.get(k, 0)*j + dct[i].get(k,0) ) / (j+1) total_contributions.append(contr) for i, item in enumerate(contribution): total_contributions[i] sm = sum([v for v in contribution[i].values()]) return (np.mean(predictions, axis=0), np.mean(biases, axis=0), total_contributions) else: mean_pred = None mean_bias = None mean_contribution = None for i, tree in enumerate(model.estimators_): pred, bias, contribution = _predict_tree(tree, X) if i < 1: # first iteration mean_bias = bias mean_contribution = contribution mean_pred = pred else: mean_bias = _iterative_mean(i, mean_bias, bias) mean_contribution = _iterative_mean(i, mean_contribution, contribution) mean_pred = _iterative_mean(i, mean_pred, pred) return mean_pred, mean_bias, mean_contribution
Example #26
Source File: decision_tree.py From mljar-supervised with MIT License | 4 votes |
def interpret( self, X_train, y_train, X_validation, y_validation, model_file_path, learner_name, target_name=None, class_names=None, metric_name=None, ml_task=None, explain_level=2, ): super(DecisionTreeAlgorithm, self).interpret( X_train, y_train, X_validation, y_validation, model_file_path, learner_name, target_name, class_names, metric_name, ml_task, explain_level, ) if explain_level == 0: return try: if len(class_names) > 10: # dtreeviz does not support more than 10 classes return viz = dtreeviz( self.model, X_train, y_train, target_name="target", feature_names=X_train.columns, class_names=class_names, ) tree_file_plot = os.path.join(model_file_path, learner_name + "_tree.svg") viz.save(tree_file_plot) except Exception as e: logger.info(f"Problem when visualizing decision tree. {str(e)}")
Example #27
Source File: decision_tree.py From mljar-supervised with MIT License | 4 votes |
def interpret( self, X_train, y_train, X_validation, y_validation, model_file_path, learner_name, target_name=None, class_names=None, metric_name=None, ml_task=None, explain_level=2, ): super(DecisionTreeRegressorAlgorithm, self).interpret( X_train, y_train, X_validation, y_validation, model_file_path, learner_name, target_name, class_names, metric_name, ml_task, explain_level, ) if explain_level == 0: return try: viz = dtreeviz( self.model, X_train, y_train, target_name="target", feature_names=X_train.columns, ) tree_file_plot = os.path.join(model_file_path, learner_name + "_tree.svg") viz.save(tree_file_plot) except Exception as e: logger.info(f"Problem when visuzalizin decision tree regressor. {str(e)}")
Example #28
Source File: tree.py From toad with MIT License | 4 votes |
def tree_to_dot(tree, features, high_light = 0.15): from io import StringIO from sklearn.tree import _tree out = StringIO() tree_ = tree.tree_ features = np.array([ features[i] if i != _tree.TREE_UNDEFINED else "undefined!" for i in tree_.feature ]) out.write('digraph Tree {\n') out.write('edge [fontname="FangSong"];\n') out.write('node [shape=box];\n') def recurse(node, parent = None, label = None): sample = tree_.n_node_samples[node] bad_rate = tree_.value[node][0,1] / sample out.write('{} [label="'.format(node)) out.write('bad rate: {:.2%}\n'.format(bad_rate)) out.write('sample: {:.2%}\n'.format(sample / tree_.n_node_samples[0])) # end of label out.write('"') if bad_rate > high_light: out.write(', color="red"') # end of node out.write('];\n') if tree_.feature[node] != _tree.TREE_UNDEFINED: name = features[node] threshold = tree_.threshold[node] recurse(tree_.children_left[node], node, '{} <= {:.2f}'.format(name, threshold)) recurse(tree_.children_right[node], node, '{} > {:.2f}'.format(name, threshold)) if parent is not None: out.write('{} -> {} [label="{}"];\n'.format(parent, node, label)) recurse(0, None) out.write('}') s = out.getvalue() out.close() return s
Example #29
Source File: GetMLPara.py From dr_droid with Apache License 2.0 | 4 votes |
def final_train_and_test_after_preparation(X_original,y): #KNN knn = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', n_neighbors=5, p=2, weights='uniform') #decision tree dtree = DecisionTreeClassifier( criterion='entropy', min_samples_leaf=4, min_samples_split=2, random_state=None, splitter='best') #random forest rforest = RandomForestClassifier(bootstrap=True, criterion='gini', max_depth=None, max_features='auto', min_samples_leaf=1, min_samples_split=2, n_estimators=10, n_jobs=1, oob_score=False, random_state=3) #svm svmrbf= svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, kernel='rbf', max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001, verbose=False) #naive bayes #nbb = BernoulliNB() X = SelectKBest(f_classif, k=80).fit_transform(X_original,y) #X = X_original print (X.shape) #get_scroe_using_cv(knn, X, y) #get_scroe_using_cv(dtree, X, y) #get_scroe_using_cv(rforest, X, y) #get_scroe_using_cv(svmrbf, X, y) fpr_knn, tpr_knn, auc_knn = get_fpr_tpr(knn, X, y) fpr_dtree, tpr_dtree, auc_dtree = get_fpr_tpr(dtree, X, y) fpr_rforest, tpr_rforest, auc_rforest = get_fpr_tpr(rforest, X, y) fpr_svmrbf, tpr_svmrbf ,auc_svmrbf= get_fpr_tpr(svmrbf, X, y) #fpr_nbb, tpr_nbb ,auc_nbb= get_fpr_tpr(nbb, X, y) plt.clf() plt.plot(fpr_svmrbf, tpr_svmrbf, 'y.--', label ='SVM AUC=%0.4f'% auc_svmrbf) plt.plot(fpr_knn, tpr_knn, 'r^--', label='KNN AUC=%0.4f' %auc_knn) plt.plot(fpr_dtree, tpr_dtree, 'b>--', label ='D.Tree AUC=%0.4f'% auc_dtree) plt.plot(fpr_rforest, tpr_rforest, 'go--', label ='R.Forest AUC=%0.4f'% auc_rforest) #plt.plot(fpr_nbb, tpr_nbb, 'c*--', label ='Random Forest auc=%0.4f'% auc_nbb) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([-0.02, 1.02]) plt.ylim([-0.02, 1.02]) plt.xlabel('FPR(False Positive Rate)',fontsize=20) plt.ylabel('TPR(True Positive Rate)',fontsize=20) #plt.title('Receiver operating characteristic ') plt.legend(loc="lower right") plt.tight_layout() plt.grid() plt.show() del X del y ##################################DRAW P-R CURVE####################################################### ############3 this is the precisio and recall curve
Example #30
Source File: Estimators.py From slates_semisynth_expts with BSD 3-Clause "New" or "Revised" License | 4 votes |
def estimateAll(self, metric=None): if self.savedValues is not None: return self.savedValues=[] numQueries=len(self.loggingPolicy.dataset.docsPerQuery) for query in range(numQueries): newRanking=self.targetPolicy.predict(query, self.rankingSize) allFeatures=self.loggingPolicy.dataset.features[query][newRanking,:] if newRanking.size < self.rankingSize: emptyPad=scipy.sparse.csr_matrix((self.rankingSize-newRanking.size, self.numFeatures), dtype=numpy.float64) allFeatures=scipy.sparse.vstack((allFeatures, emptyPad), format="csr", dtype=numpy.float64) allFeatures=allFeatures.toarray() nRows, nCols = allFeatures.shape size=nRows*nCols currentFeatures=numpy.reshape(allFeatures, (1,size)) currentValue=None if self.estimatorType=='tree': currentValue=self.tree.predict(currentFeatures)[0] else: currentValue=numpy.dot(currentFeatures, self.policyParams)[0] low=None high=None if metric is not None: low=metric.getMin(newRanking.size) high=metric.getMax(newRanking.size) if low is not None: currentValue = max(currentValue, low) if high is not None: currentValue = min(currentValue, high) if currentValue > 1.0 or currentValue < 0.0: print("Direct:estimateAll [LOG] estimate %0.3f " % (currentValue), flush=True) del allFeatures del currentFeatures self.savedValues.append(currentValue) if query%100==0: print(".", end="", flush=True) print("") print("Direct:estimateAll [LOG] Precomputed estimates.", flush=True)