python source code of model

import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.neural_network import MLPRegressor as MLPC
from sklearn.ensemble import ExtraTreesRegressor as ETC
from sklearn.ensemble import RandomForestRegressor as RFC
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA

import numerox as nx
"""

Here are the directions for making your own model:

https://github.com/kwgoodman/numerox/blob/master/numerox/examples/model.rst

"""

# ---------------------------------------------------------------------------
# base class for all models


class Model(object):
    def __repr__(self):
        model = self.name
        msg = ""
        if hasattr(self, "p"):
            if len(self.p) == 0:
                msg += model + "()"
            else:
                msg += model + "("
                for name, value in self.p.items():
                    msg += name + "=" + str(value) + ", "
                msg = msg[:-2]
                msg += ")"
        else:
            msg += model + "()"
        return msg

    @property
    def name(self):
        "Name of model"
        if not hasattr(self, '_name'):
            self._name = self.__class__.__name__
        return self._name

    def rename(self, name):
        "Rename model in place; model is returned"
        if name is None:
            return self
        if not nx.isstring(name):
            raise ValueError('`name` must be a string')
        self._name = name
        return self


# ---------------------------------------------------------------------------
# numerox example models


class linear(Model):
    def fit_predict(self, dfit, dpre, tournament):
        model = LinearRegression()
        model.fit(dfit.x, dfit.y[tournament])
        yhat = model.predict(dpre.x)
        return dpre.ids, yhat


class ridge_mean(Model):
    def __init__(self, alpha=6):
        self.p = {'alpha': alpha}

    def fit_predict(self, dfit, dpre, tournament):
        model = Ridge(alpha=self.p['alpha'], normalize=True)
        yfit = dfit.y[:].mean(axis=1)
        model.fit(dfit.x, yfit)
        yhat = model.predict(dpre.x)
        return dpre.ids, yhat


class extratrees(Model):
    def __init__(self, ntrees=100, depth=3, nfeatures=7, seed=0):
        self.p = {
            'ntrees': ntrees,
            'depth': depth,
            'nfeatures': nfeatures,
            'seed': seed
        }

    def fit_predict(self, dfit, dpre, tournament):
        clf = ETC(criterion='mse',
                  max_features=self.p['nfeatures'],
                  max_depth=self.p['depth'],
                  n_estimators=self.p['ntrees'],
                  random_state=self.p['seed'],
                  n_jobs=-1)
        clf.fit(dfit.x, dfit.y[tournament])
        yhat = clf.predict(dpre.x)
        return dpre.ids, yhat


class randomforest(Model):
    def __init__(self, ntrees=100, depth=3, max_features=2, seed=0):
        self.p = {
            'ntrees': ntrees,
            'depth': depth,
            'max_features': max_features,
            'seed': seed
        }

    def fit_predict(self, dfit, dpre, tournament):
        clf = RFC(criterion='mse',
                  max_features=self.p['max_features'],
                  max_depth=self.p['depth'],
                  n_estimators=self.p['ntrees'],
                  random_state=self.p['seed'],
                  n_jobs=-1)
        clf.fit(dfit.x, dfit.y[tournament])
        yhat = clf.predict(dpre.x)
        return dpre.ids, yhat


class mlpc(Model):
    def __init__(self,
                 alpha=0.11,
                 layers=[5, 3],
                 activation='tanh',
                 learn=0.002,
                 seed=0):
        self.p = {
            'alpha': alpha,
            'layers': layers,
            'activation': activation,
            'learn': learn,
            'seed': seed
        }

    def fit_predict(self, dfit, dpre, tournament):
        clf = MLPC(hidden_layer_sizes=self.p['layers'],
                   alpha=self.p['alpha'],
                   activation=self.p['activation'],
                   learning_rate_init=self.p['learn'],
                   random_state=self.p['seed'],
                   max_iter=200)
        clf.fit(dfit.x, dfit.y[tournament])
        yhat = clf.predict(dpre.x)
        return dpre.ids, yhat


# model used by numerai to generate example_predictions.csv
class example_predictions(Model):
    def __init__(self):
        self.p = {}

    def fit_predict(self, dfit, dpre, tournament):
        model = GradientBoostingRegressor(n_estimators=25,
                                          max_depth=1,
                                          random_state=1776)
        model.fit(dfit.x, dfit.y[tournament])
        yhat = model.predict(dpre.x)
        yhat = np.round(yhat, 5)
        return dpre.ids, yhat


# sklearn pipeline example
class linearPCA(Model):
    def __init__(self, nfeatures=10):
        self.p = {'nfeatures': nfeatures}

    def fit_predict(self, dfit, dpre, tournament):
        pipe = Pipeline([('pca', PCA(n_components=self.p['nfeatures'])),
                         ("lr", LinearRegression())])
        pipe.fit(dfit.x, dfit.y[tournament])
        yhat = pipe.predict(dpre.x)
        return dpre.ids, yhat


# fast model for testing; always predicts 0.5
class fifty(Model):
    def __init__(self):
        self.p = {}

    def fit_predict(self, dfit, dpre, tournament):
        yhat = 0.5 * np.ones(len(dpre))
        return dpre.ids, yhat