# -*- coding: utf-8 -*- from __future__ import division from __future__ import print_function import os import sys from os import path import unittest # noinspection PyProtectedMember from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_array_less from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_greater_equal from sklearn.utils.testing import assert_less_equal from sklearn.utils.testing import assert_raises from sklearn.utils.estimator_checks import check_estimator from sklearn.metrics import roc_auc_score from sklearn.model_selection import train_test_split from sklearn.utils.validation import check_X_y from scipy.io import loadmat from scipy.stats import rankdata # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from pyod.models.xgbod import XGBOD from pyod.utils.data import generate_data class TestXGBOD(unittest.TestCase): def setUp(self): # Define data file and read X and y # Generate some data if the source data is missing this_directory = path.abspath(path.dirname(__file__)) mat_file = 'pima.mat' try: mat = loadmat(path.join(*[this_directory, 'data', mat_file])) except TypeError: print('{data_file} does not exist. Use generated data'.format( data_file=mat_file)) X, y = generate_data(train_only=True) # load data except IOError: print('{data_file} does not exist. Use generated data'.format( data_file=mat_file)) X, y = generate_data(train_only=True) # load data else: X = mat['X'] y = mat['y'].ravel() X, y = check_X_y(X, y) self.X_train, self.X_test, self.y_train, self.y_test = \ train_test_split(X, y, test_size=0.4, random_state=42) self.clf = XGBOD(random_state=42) self.clf.fit(self.X_train, self.y_train) self.roc_floor = 0.75 def test_parameters(self): assert (hasattr(self.clf, 'clf_') and self.clf.decision_scores_ is not None) assert (hasattr(self.clf, '_scalar') and self.clf.labels_ is not None) assert (hasattr(self.clf, 'n_detector_') and self.clf.labels_ is not None) assert (hasattr(self.clf, 'X_train_add_') and self.clf.labels_ is not None) assert (hasattr(self.clf, 'decision_scores_') and self.clf.decision_scores_ is not None) assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None) def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba(self): pred_proba = self.clf.predict_proba(self.X_test) assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) # check performance assert_greater(roc_auc_score(self.y_test, pred_proba), self.roc_floor) def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train, self.y_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') def test_predict_rank(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test) print(pred_ranks) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), rtol=4) assert_array_less(pred_ranks, self.X_train.shape[0] + 1) assert_array_less(-0.1, pred_ranks) def test_predict_rank_normalized(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), rtol=4) assert_array_less(pred_ranks, 1.01) assert_array_less(-0.1, pred_ranks) def tearDown(self): pass if __name__ == '__main__': unittest.main()