"""
Tests scikit-learn's feature selection converters
"""
import unittest
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import (
    GenericUnivariateSelect,
    RFE,
    RFECV,
    SelectFdr,
    SelectFpr,
    SelectFromModel,
)
from sklearn.feature_selection import (
    SelectFwe,
    SelectKBest,
    SelectPercentile,
    VarianceThreshold,
)
from sklearn.svm import SVR
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import Int64TensorType, FloatTensorType
from test_utils import dump_data_and_model


class TestSklearnFeatureSelectionConverters(unittest.TestCase):
    def test_generic_univariate_select_int(self):
        model = GenericUnivariateSelect()
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.int64,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "generic univariate select",
            [("input", Int64TensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnGenericUnivariateSelect",
            # Operator cast-1 is not implemented in onnxruntime
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_rfe_int(self):
        model = RFE(estimator=SVR(kernel="linear"))
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.int64,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "rfe", [("input", Int64TensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnRFE",
            methods=["transform"],
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_rfecv_int(self):
        model = RFECV(estimator=SVR(kernel="linear"), cv=3)
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.int64,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "rfecv", [("input", Int64TensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnRFECV",
            methods=["transform"],
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_select_fdr_int(self):
        model = SelectFdr()
        X, y = load_breast_cancer(return_X_y=True)
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fdr",
            [("input", Int64TensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnSelectFdr",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_select_fpr_int(self):
        model = SelectFpr()
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.int64,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fpr",
            [("input", Int64TensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnSelectFpr",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_select_from_model_int(self):
        model = SelectFromModel(estimator=SVR(kernel="linear"))
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.int64,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "select from model",
            [("input", Int64TensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnSelectFromModel",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_select_fwe_int(self):
        model = SelectFwe()
        X, y = load_breast_cancer(return_X_y=True)
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fwe",
            [("input", Int64TensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnSelectFwe",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_select_k_best_int(self):
        model = SelectKBest(k="all")
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.int64,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "select k best",
            [("input", Int64TensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnSelectKBest",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_select_percentile_int(self):
        model = SelectPercentile()
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.int64,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "select percentile",
            [("input", Int64TensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnSelectPercentile",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_variance_threshold_int(self):
        model = VarianceThreshold()
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.int64,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "variance threshold",
            [("input", Int64TensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnVarianceThreshold",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_generic_univariate_select_float(self):
        model = GenericUnivariateSelect()
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.float32,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "generic univariate select",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnGenericUnivariateSelect",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_rfe_float(self):
        model = RFE(estimator=SVR(kernel="linear"))
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.float32,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "rfe", [("input", FloatTensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnRFE",
            methods=["transform"],
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_rfecv_float(self):
        model = RFECV(estimator=SVR(kernel="linear"), cv=3)
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.float32,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "rfecv", [("input", FloatTensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnRFECV",
            methods=["transform"],
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_select_fdr_float(self):
        model = SelectFdr()
        X, y = load_breast_cancer(return_X_y=True)
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fdr",
            [("input", FloatTensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnSelectFdr",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_select_fpr_float(self):
        model = SelectFpr()
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.float32,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fpr",
            [("input", FloatTensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnSelectFpr",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_select_from_model_float(self):
        model = SelectFromModel(estimator=SVR(kernel="linear"))
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.float32,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "select from model",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnSelectFromModel",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_select_from_model_float_nomodel(self):
        model = SelectFromModel(
            estimator=SVR(kernel="linear"), threshold=1e5)
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.float32,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        with self.assertRaises(RuntimeError):
            convert_sklearn(
                model, "select from model",
                [("input", FloatTensorType([None, X.shape[1]]))])

    def test_select_fwe_float(self):
        model = SelectFwe()
        X, y = load_breast_cancer(return_X_y=True)
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fwe",
            [("input", FloatTensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnSelectFwe",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_select_k_best_float(self):
        model = SelectKBest(k="all")
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.float32,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "select k best",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnSelectKBest",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_select_percentile_float(self):
        model = SelectPercentile()
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.float32,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "select percentile",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnSelectPercentile",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )

    def test_variance_threshold_float(self):
        model = VarianceThreshold()
        X = np.array(
            [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
            dtype=np.float32,
        )
        y = np.array([0, 1, 0, 1])
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "variance threshold",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnVarianceThreshold",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        )


if __name__ == "__main__":
    unittest.main()