python source code of test

"""Test revrand models."""
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.base import clone
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

from revrand import StandardLinearModel, GeneralizedLinearModel
from revrand.likelihoods import Gaussian, Binomial
from revrand.basis_functions import LinearBasis, RandomRBF, RandomMatern52
from revrand.metrics import smse
from revrand.btypes import Parameter, Positive


def test_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    basis = LinearBasis(onescol=False)

    slm = StandardLinearModel(basis)
    slm.fit(X, y)
    Ey = slm.predict(Xs)

    assert smse(ys, Ey) < 0.1

    basis = LinearBasis(onescol=False) \
        + RandomRBF(nbases=10, Xdim=X.shape[1]) \
        + RandomMatern52(nbases=10, Xdim=X.shape[1])

    slm = StandardLinearModel(basis)
    slm.fit(X, y)
    Ey = slm.predict(Xs)

    assert smse(ys, Ey) < 0.1


def test_pipeline_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    slm = StandardLinearModel(LinearBasis(onescol=True))
    estimators = [('PCA', PCA()),
                  ('SLM', slm)]
    pipe = Pipeline(estimators)

    pipe.fit(X, y)
    Ey = pipe.predict(Xs)
    assert smse(ys, Ey) < 0.1


def test_gridsearch_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    slm = StandardLinearModel(LinearBasis(onescol=True))

    param_dict = {'var': [Parameter(v, Positive()) for v in [1.0, 2.0]]}
    estimator = GridSearchCV(slm, param_dict, n_jobs=-1)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs


def test_randomgridsearch_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    slm = StandardLinearModel(LinearBasis(onescol=True))

    param_dict = {
        'var': [Parameter(1.0 / v, Positive()) for v in range(1, 6)]
    }
    estimator = RandomizedSearchCV(slm, param_dict, n_jobs=-1, n_iter=2)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs


def test_glm_gaussian(make_gaus_data, make_random):

    X, y, Xs, ys = make_gaus_data

    basis = LinearBasis(onescol=True)
    lhood = Gaussian()

    # simple SGD
    glm = GeneralizedLinearModel(lhood, basis, random_state=make_random)
    glm.fit(X, y)
    Ey = glm.predict(Xs)
    assert smse(ys, Ey) < 0.1

    # Test BasisCat
    basis = LinearBasis(onescol=True) \
        + RandomRBF(nbases=20, Xdim=X.shape[1]) \
        + RandomMatern52(nbases=20, Xdim=X.shape[1])

    glm = GeneralizedLinearModel(lhood, basis, random_state=make_random)
    glm.fit(X, y)
    Ey = glm.predict(Xs)
    assert smse(ys, Ey) < 0.1

    # Test upper quantile estimates
    py, _, _ = glm.predict_cdf(Xs, 1e5)
    assert np.allclose(py, 1.)

    # Test log probability
    lpy, _, _ = glm.predict_logpdf(Xs, Ey)
    assert np.all(lpy > -100)

    EyQn, EyQx = glm.predict_interval(Xs, 0.9)
    assert all(Ey <= EyQx)
    assert all(Ey >= EyQn)


def test_glm_binomial(make_binom_data, make_random):
    # This is more to test the logic than to test if the model can overfit,
    # hence more relaxed SMSE. This is because this is a harder problem than
    # the previous case. We also haven't split training ans test sets, since we
    # want to check the latent function and bounds

    X, y, p, n = make_binom_data
    f = p * n

    basis = LinearBasis(onescol=True) \
        + RandomRBF(nbases=20, Xdim=X.shape[1]) \
        + RandomMatern52(nbases=20, Xdim=X.shape[1])
    lhood = Binomial()
    largs = (n,)

    # SGD
    glm = GeneralizedLinearModel(lhood, basis, random_state=make_random)
    glm.fit(X, y, likelihood_args=largs)
    Ey = glm.predict(X, likelihood_args=largs)

    assert smse(f, Ey) < 1

    # Test upper quantile estimates
    py, _, _ = glm.predict_cdf(X, 1e5, likelihood_args=largs)
    assert np.allclose(py, 1.)

    EyQn, EyQx = glm.predict_interval(X, 0.9, likelihood_args=largs)
    assert all(Ey <= EyQx)
    assert all(Ey >= EyQn)


def test_pipeline_glm(make_gaus_data, make_random):

    X, y, Xs, ys = make_gaus_data

    glm = GeneralizedLinearModel(Gaussian(), LinearBasis(onescol=True),
                                 random_state=make_random)
    estimators = [('PCA', PCA()),
                  ('SLM', glm)
                  ]
    pipe = Pipeline(estimators)

    pipe.fit(X, y)
    Ey = pipe.predict(Xs)
    assert smse(ys, Ey) < 0.1


def test_gridsearch_glm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    glm = GeneralizedLinearModel(Gaussian(), LinearBasis(onescol=True),
                                 random_state=1, maxiter=100)

    param_dict = {'batch_size': [10, 20]}
    estimator = GridSearchCV(glm, param_dict, verbose=1, n_jobs=-1)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs


def test_randomgridsearch_glm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    glm = GeneralizedLinearModel(Gaussian(), LinearBasis(onescol=True),
                                 random_state=1, maxiter=100)

    param_dict = {'batch_size': range(1, 11)}
    estimator = RandomizedSearchCV(glm, param_dict, verbose=1, n_jobs=-1,
                                   n_iter=2)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs


def test_sklearn_clone(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    basis = LinearBasis(onescol=True)
    slm = StandardLinearModel(basis=basis)
    glm = GeneralizedLinearModel(likelihood=Gaussian(), basis=basis,
                                 maxiter=100)

    slm_clone = clone(slm)
    glm_clone = clone(glm)

    slm_clone.fit(X, y)
    glm_clone.fit(X, y)

    # scalar values
    glm_keys = [
        'K',
        'batch_size',
        'maxiter',
        'nsamples',
        'random_state',
        'updater'
    ]

    for k in glm_keys:
        assert glm.get_params()[k] == glm_clone.get_params()[k]

    # Manually test likelihood objects
    assert glm_clone.likelihood.params.value == glm.likelihood.params.value

    # scalar values
    slm_keys = [
        'maxiter',
        'tol'
    ]

    for k in slm_keys:
        assert slm.get_params()[k] == slm_clone.get_params()[k]

    # Manually test variance objects
    assert slm_clone.var.value == slm.var.value