python source code of test

# -*- coding: utf-8 -*-

import unittest

import pytest
import yaml
import pandas as pd
import numpy as np

from gordo.machine.model.transformers.imputer import InfImputer
from gordo import serializer

from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA


class GordoFunctionTransformerFuncsTestCase(unittest.TestCase):
    """
    Test all functions within gordo meants for use in a Scikit-Learn
    FunctionTransformer work as expected
    """

    def _validate_transformer(self, transformer):
        """
        Inserts a transformer into the middle of a pipeline and runs it
        """
        pipe = Pipeline([("pca1", PCA()), ("custom", transformer), ("pca2", PCA())])
        X = np.random.random(size=100).reshape(10, 10)
        pipe.fit_transform(X)

    def test_multiply_by_function_transformer(self):
        from gordo.machine.model.transformer_funcs.general import multiply_by

        # Provide a require argument
        tf = FunctionTransformer(func=multiply_by, kw_args={"factor": 2})
        self._validate_transformer(tf)

        # Ignore the required argument
        tf = FunctionTransformer(func=multiply_by)
        with self.assertRaises(TypeError):
            self._validate_transformer(tf)


@pytest.mark.parametrize("strategy", ["extremes", "minmax"])
def test_infimputer_basic(strategy):
    """
    Functionality of the InfImputer
    """
    base_x = np.random.random((100, 10)).astype(np.float32)

    flat_view = base_x.ravel()

    pos_inf_idxs = np.random.randint(0, len(flat_view), size=100)
    neg_inf_idxs = np.random.randint(0, len(flat_view), size=100)

    flat_view[pos_inf_idxs] = np.inf
    flat_view[neg_inf_idxs] = -np.inf

    # Our base x should now be littered with pos/neg inf values
    assert np.isposinf(base_x).sum() > 0, "Expected some positive infinity values here"
    assert np.isneginf(base_x).sum() > 0, "Expected some negative infinity values here"

    imputer = InfImputer(strategy=strategy, delta=2.0)

    # Test imputer on numpy array
    X = base_x.copy()
    X = imputer.fit_transform(X)
    assert np.isposinf(X).sum() == 0, "Expected no positive infinity values here"
    assert np.isneginf(X).sum() == 0, "Expected no negative infinity values here"

    if strategy == "extremes":
        # All pos infs in base_x should be filled in X now as the max dtype value
        assert np.all(X[np.where(np.isposinf(base_x))] == np.finfo(X.dtype).max)

        # All neg infs in base_x should be filled in X now as the min dtype value
        assert np.all(X[np.where(np.isneginf(base_x))] == np.finfo(X.dtype).min)

    # min max is a bit more difficult to assert however.
    elif strategy == "minmax":

        # Identify the features in the base array which have pos infs
        features_with_pos_infs = np.where(np.isposinf(base_x))[1]

        # Get the maxes of those features in the imputed X
        filled_maxes = X[:, features_with_pos_infs].max(axis=0)

        # Get the previous maxes
        previous_maxes = np.ma.masked_invalid(base_x[:, features_with_pos_infs]).max(
            axis=0
        )

        # Compare that each new max is the previous max + 2.
        # These were previously 'inf' values, replaced by previous max + 2
        for filled_max, previous_max in zip(filled_maxes, previous_maxes):
            assert np.isclose(filled_max, previous_max + 2.0)

        ### Repeat the process for negative infs ###

        # Identify the features in the base array which have pos infs
        features_with_neg_infs = np.where(np.isneginf(base_x))[1]

        # Get the maxes of those features in the imputed X
        filled_mins = X[:, features_with_neg_infs].min(axis=0)

        # Get the previous maxes
        previous_mins = np.ma.masked_invalid(base_x[:, features_with_neg_infs]).min(
            axis=0
        )

        # Now compare, that each new min is the min - 2. wq
        for filled_min, previous_min in zip(filled_mins, previous_mins):
            assert np.isclose(filled_min, previous_min - 2.0)

    # Test imputer on pandas dataframe
    X = pd.DataFrame(base_x.copy())
    X = imputer.fit_transform(X)
    assert np.isposinf(X).sum() == 0, "Expected no positive infinity values here"
    assert np.isneginf(X).sum() == 0, "Expected no negative infinity values here"


def test_infimputer_fill_values():
    """
    InfImputer when fill values are provided
    """
    base_x = np.random.random((100, 10)).astype(np.float32)

    flat_view = base_x.ravel()

    pos_inf_idxs = [1, 2, 3, 4, 5]
    neg_inf_idxs = [6, 7, 8, 9, 10]

    flat_view[pos_inf_idxs] = np.inf
    flat_view[neg_inf_idxs] = -np.inf

    # Our base x should now be littered with pos/neg inf values
    assert np.isposinf(base_x).sum() > 0, "Expected some positive infinity values here"
    assert np.isneginf(base_x).sum() > 0, "Expected some negative infinity values here"

    imputer = InfImputer(inf_fill_value=9999.0, neg_inf_fill_value=-9999.0)
    X = imputer.fit_transform(base_x)
    np.equal(
        X.ravel()[[pos_inf_idxs]], np.array([9999.0, 9999.0, 9999.0, 9999.0, 9999.0])
    )
    np.equal(
        X.ravel()[[neg_inf_idxs]],
        np.array([-9999.0, -9999.0, -9999.0, -9999.0, -9999.0]),
    )


@pytest.mark.parametrize(
    "config_str",
    [
        """
    sklearn.pipeline.Pipeline:
      steps:
        - gordo.machine.model.transformers.imputer.InfImputer
    """,
        """
    sklearn.pipeline.Pipeline:
      steps:
        - gordo.machine.model.transformers.imputer.InfImputer:
            inf_fill_value: 10
    """,
        """gordo.machine.model.transformers.imputer.InfImputer""",
    ],
)
def test_imputer_from_definition(config_str: str):
    """
    Ensure it plays well with the gordo serializer
    """
    config = yaml.safe_load(config_str)
    model = serializer.from_definition(config)

    if isinstance(model, Pipeline):
        assert isinstance(model.steps[-1][1], InfImputer)
    else:
        assert isinstance(model, InfImputer)

    serializer.from_definition(serializer.into_definition(model))