python source code of classifier

# coding:utf-8
from __future__ import print_function

import pickle
from numpy import shape
from sklearn import preprocessing
from sklearn.datasets import load_svmlight_file
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

from stst import utils

__all__ = [
    "Strategy",
    "Classifier",
    "RandomForestRegression",
    "GradientBoostingRegression",
    "AverageEnsemble"
]


class Strategy(object):
    def train_model(self, train_file_path, model_path):
        return None

    def test_model(self, test_file_path, model_path, result_file_path):
        return None

    def load_file(self, file_path):
        data = load_svmlight_file(file_path)
        return data[0], data[1]


class Classifier(object):
    def __init__(self, strategy):
        self.strategy = strategy

    def train_model(self, train_file_path, model_path):
        return self.strategy.train_model(train_file_path, model_path)

    def test_model(self, test_file_path, model_path, result_file_path):
        return self.strategy.test_model(test_file_path, model_path, result_file_path)


class RandomForestRegression(Strategy):
    """
    RandomForest Regression

    """
    def __init__(self, n_estimators=300):
        self.trainer = "RandomForest Regression"
        print("==> Using %s Classifier" % (self.trainer))
        self.n_estimators = n_estimators

    def train_model(self, train_file_path, model_path):
        print("==> Load the data ...")
        X_train, Y_train = self.load_file(train_file_path)
        print(train_file_path, shape(X_train))

        print("==> Train the model ...")
        min_max_scaler = preprocessing.MaxAbsScaler()
        X_train_minmax = min_max_scaler.fit_transform(X_train)
        clf = RandomForestRegressor(n_estimators=self.n_estimators)
        clf.fit(X_train_minmax.toarray(), Y_train)

        print("==> Save the model ...")
        pickle.dump(clf, open(model_path, 'wb'))

        scaler_path = model_path.replace('.pkl', '.scaler.pkl')
        pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
        return clf

    def test_model(self, test_file_path, model_path, result_file_path):
        print("==> Load the data ...")
        X_test, Y_test = self.load_file(test_file_path)
        print(test_file_path, shape(X_test))

        print("==> Load the model ...")
        clf = pickle.load(open(model_path, 'rb'))
        scaler_path = model_path.replace('.pkl', '.scaler.pkl')
        min_max_scaler = pickle.load(open(scaler_path, 'rb'))

        print("==> Test the model ...")
        X_test_minmax = min_max_scaler.transform(X_test)
        y_pred = clf.predict(X_test_minmax.toarray())

        print("==> Save the result ...")
        with utils.create_write_file(result_file_path) as f:
            for y in y_pred:
                print(y, file=f)
        return y_pred


class GradientBoostingRegression(Strategy):
    """
    Gradient Boosting Regression
    """
    def __init__(self, n_estimators=140):
        self.trainer = "GradientBoostingRegression"
        print("Using %s Classifier" % (self.trainer))
        self.n_estimators = n_estimators

    def train_model(self, train_file_path, model_path):
        print("==> Load the data ...")
        X_train, Y_train = self.load_file(train_file_path)
        print(train_file_path, shape(X_train))

        print("==> Train the model ...")
        min_max_scaler = preprocessing.MaxAbsScaler()
        X_train_minmax = min_max_scaler.fit_transform(X_train)

        clf = GradientBoostingRegressor(n_estimators=self.n_estimators)
        clf.fit(X_train_minmax.toarray(), Y_train)

        print("==> Save the model ...")
        pickle.dump(clf, open(model_path, 'wb'))

        scaler_path = model_path.replace('.pkl', '.scaler.pkl')
        pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
        return clf

    def test_model(self, test_file_path, model_path, result_file_path):
        print("==> Load the data ...")
        X_test, Y_test = self.load_file(test_file_path)
        print(test_file_path, shape(X_test))

        print("==> Load the model ...")
        clf = pickle.load(open(model_path, 'rb'))
        scaler_path = model_path.replace('.pkl', '.scaler.pkl')
        min_max_scaler = pickle.load(open(scaler_path, 'rb'))

        print("==> Test the model ...")
        X_test_minmax = min_max_scaler.transform(X_test)
        y_pred = clf.predict(X_test_minmax.toarray())

        print("==> Save the result ...")
        with utils.create_write_file(result_file_path) as f:
            for y in y_pred:
                print(y, file=f)
        return y_pred


class AverageEnsemble(Strategy):
    def __init__(self):
        self.trainer = "Average Ensemble"
        print("Using %s Classifier" % (self.trainer))


    def train_model(self, train_file_path, model_path):
        pass

    def test_model(self, test_file_path, model_path, result_file_path):
        print("==> Load the data ...")
        X_test, Y_test = self.load_file(test_file_path)
        print(test_file_path, shape(X_test))
        X_test = X_test.toarray()
        for x in X_test[:10]:
            print(x)

        print("==> Test the model ...")
        y_pred = []
        for x in X_test:
            x = sum(x) / len(x)
            y_pred.append(x)

        print("==> Save the result ...")
        with utils.create_write_file(result_file_path) as f:
            for y in y_pred:
                print(y, file=f)
        return y_pred