python source code of ocsvm

#!/usr/bin/python

import json
import lsanomaly
import matplotlib.pyplot as plt
import numpy as np
import pandas
from keras.preprocessing import sequence
from sklearn import metrics
from sklearn import svm

import helpers

class OCSVM:
    def __init__(self):
        self.settings = helpers.ConfigSectionMap('settings.ini', 'OCSVM')
        self.data = helpers.ConfigSectionMap('settings.ini', 'Data')

    def print_accuracy(self, title, datasetY, predictions):
        print title

        print("accuracy: ", metrics.accuracy_score(datasetY, predictions))
        print("precision: ", metrics.precision_score(datasetY, predictions))
        print("recall: ", metrics.recall_score(datasetY, predictions))
        print("f1: ", metrics.f1_score(datasetY, predictions))
        print("area under curve (auc): ", metrics.roc_auc_score(datasetY, predictions))

    def replace_in_list(self, list, oldChar, newChar):
        for n, i in enumerate(list):
            if i == 'anomaly':
                list[n] = -1.

    def save_parameters(self, model):
        parameters = model.get_params()
        parameters = json.dumps(parameters)
        with open("model_ocsvm.json", "w") as json_file:
            json_file.write(parameters)

        print("Saved parameters to filesystem")

    def load_parameters(self, model, model_filename = 'model_ocsvm.json'):
        json_file = open(model_filename, 'r')
        loaded_parameters = json.loads(json_file)
        json_file.close()
        print "Loaded parameters from filesystem."

        return model.set_params(loaded_parameters)


    def train_with_scikit(self, trainX, testX):
        settings = self.settings

        if (settings['load_parameters'] == True):
            parameters = self.load_parameters()
            clf = svm.OneClassSVM(parameters)
        else:
            clf = svm.OneClassSVM(nu=settings['nu'], kernel=settings['kernel'], gamma=settings['gamma'], verbose=settings['verbose'])

        clf.fit(trainX)
        y_pred_train = clf.predict(trainX)
        y_pred_test = clf.predict(testX)

        n_error_train = y_pred_train[y_pred_train == -1].size
        n_error_test = y_pred_test[y_pred_test == -1].size

        return y_pred_train, y_pred_test, n_error_train, n_error_test


    def train_with_lsanomaly(self, trainX, testX):
        anomalymodel = lsanomaly.LSAnomaly()
        anomalymodel.fit(trainX)
        y_pred_train = anomalymodel.predict(trainX)
        y_pred_test = anomalymodel.predict(testX)

        # Process results
        self.replace_in_list(y_pred_train, 'anomaly', -1)
        self.replace_in_list(y_pred_test, 'anomaly', -1)
        n_error_train = y_pred_train.count(-1)
        n_error_test = y_pred_test.count(-1)

        return y_pred_train, y_pred_test, n_error_train, n_error_test

    def run(self):
        xx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500))
        max_vector_length = 30

        # Create datasets
        train_dataset = pandas.read_csv(self.data['train_dataset_file'], delimiter=';', engine='python')
        test_dataset = pandas.read_csv(self.data['test_dataset_file'], delimiter=';', engine='python')

        train_dataset = train_dataset[:len(train_dataset)/6]

        # Convert strings
        train_dataset_array = helpers.collection_values_to_array(train_dataset)
        test_dataset_array = helpers.collection_values_to_array(test_dataset)

        # Padding (from left)
        trainX = sequence.pad_sequences(train_dataset_array, maxlen=max_vector_length)
        testX = sequence.pad_sequences(test_dataset_array, maxlen=max_vector_length) #padding='pre'

        assert (trainX.shape[1] == testX.shape[1])

        # fit the model
        if (self.settings['use_lsanomaly'] == True):
            y_pred_train, y_pred_test, n_error_train, n_error_test = self.train_with_lsanomaly(trainX, testX)
        else:
            y_pred_train, y_pred_test, n_error_train, n_error_test = self.train_with_scikit(trainX, testX)

        #testX_plot = []
        #for n, i in enumerate(testX):
        #    for m, j in enumerate(testX):
        #        if i >= 0:
        #            testX_plot.append(n)

        #plt.set_cmap(plt.cm.Paired)
        #plt.scatter(trainX[y_pred_train>0], trainX[y_pred_train>0], c='black', label='inliers')
        #plt.scatter(trainX[y_pred_train <= 0], trainX[y_pred_train <= 0], c='red', label='outliers')
        #plt.scatter(testX_plot, testX_plot, c='black', label='inliers')
        #plt.scatter(testX[y_pred_test < 0], testX[y_pred_test < 0], c='red', label='outliers')
        #plt.axis('tight')
        #plt.legend()
        #plt.show()

        # Visualize
        plt.title("Novelty Detection")
        plt.figure(1)
        plt.subplot(211)
        plt.plot(trainX, 'ro', testX, 'g^')

        plt.subplot(212)
        plt.plot(y_pred_train, 'ro', y_pred_test, 'g^')
        plt.xlabel(
            "Anomalies in training set: %d/%d; Anomalies in test set: %d/%d;"
            % (n_error_train, trainX.shape[0], n_error_test, testX.shape[0]))
        plt.show()


        # Display accuracy on validation set
        #print_accuracy("Validation", testX, y_pred_test)

        #plt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu)
        #a = plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='darkred')
        #plt.contourf(xx, yy, Z, levels=[0, Z.max()], colors='palevioletred')