""" Tests pipeline within pipelines. """ import numpy as np import unittest try: from sklearn.compose import ColumnTransformer except ImportError: # not available in 0.19 ColumnTransformer = None try: from sklearn.impute import SimpleImputer except ImportError: from sklearn.preprocessing import Imputer as SimpleImputer from sklearn.decomposition import PCA from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import RobustScaler, StandardScaler from skl2onnx import convert_sklearn from skl2onnx.common.data_types import FloatTensorType from skl2onnx.common.data_types import onnx_built_with_ml from test_utils import dump_data_and_model, TARGET_OPSET class TestSklearnPipelineWithinPipeline(unittest.TestCase): @unittest.skipIf(not onnx_built_with_ml(), reason="Requires ONNX-ML extension.") def test_pipeline_pca_pipeline_minmax(self): model = Pipeline( memory=None, steps=[ ( "PCA", PCA( copy=True, iterated_power="auto", n_components=0.15842105263157896, random_state=None, tol=0.0, svd_solver="auto", whiten=False, ), ), ( "Pipeline", Pipeline( memory=None, steps=[( "MinMax scaler", MinMaxScaler( copy=True, feature_range=(0, 3.7209871159509307), ), )], ), ), ], ) data = np.array([[0, 0], [0, 0], [1, 1], [1, 1]], dtype=np.float32) y = [0, 0, 1, 1] model.fit(data, y) model_onnx = convert_sklearn( model, "pipelinewithinpipeline", [("input", FloatTensorType(data.shape))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMax", allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", ) @unittest.skipIf(not onnx_built_with_ml(), reason="Requires ONNX-ML extension.") def test_pipeline_pca_pipeline_none_lin(self): model = Pipeline( memory=None, steps=[ ( "PCA", PCA( copy=True, iterated_power="auto", n_components=0.15842105263157896, random_state=None, tol=0.0, svd_solver="auto", whiten=False, ), ), ( "Pipeline", Pipeline( memory=None, steps=[ ( "MinMax scaler", MinMaxScaler( copy=True, feature_range=(0, 3.7209871159509307), ), ), ("logreg", LogisticRegression(solver="liblinear")), ], ), ), ], ) data = np.array([[0, 0], [0, 0], [1, 1], [1, 1]], dtype=np.float32) y = [0, 0, 1, 1] model.fit(data, y) model_onnx = convert_sklearn( model, "pipelinewithinpipeline", [("input", FloatTensorType(data.shape))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMaxLogReg", allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", ) @unittest.skipIf(not onnx_built_with_ml(), reason="Requires ONNX-ML extension.") def test_pipeline_pca_pipeline_multinomial(self): model = Pipeline( memory=None, steps=[ ( "PCA", PCA( copy=True, iterated_power="auto", n_components=2, random_state=None, svd_solver="auto", tol=0.0, whiten=False, ), ), ( "Pipeline", Pipeline( memory=None, steps=[ ( "MinMax scaler", MinMaxScaler( copy=True, feature_range=(0, 3.7209871159509307), ), ), ( "MultinomialNB", MultinomialNB( alpha=0.7368421052631579, class_prior=None, fit_prior=True, ), ), ], ), ), ], ) data = np.array( [[0, 0, 0], [0, 0, 0.1], [1, 1, 1.1], [1, 1.1, 1]], dtype=np.float32, ) y = [0, 0, 1, 1] model.fit(data, y) model_onnx = convert_sklearn( model, "pipelinewithinpipeline", [("input", FloatTensorType(data.shape))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMaxNB2", allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", ) @unittest.skipIf(not onnx_built_with_ml(), reason="Requires ONNX-ML extension.") def test_pipeline_pca_pipeline_multinomial_none(self): model = Pipeline( memory=None, steps=[ ( "PCA", PCA( copy=True, iterated_power="auto", n_components=0.15842105263157896, random_state=None, tol=0.0, svd_solver="auto", whiten=False, ), ), ( "Pipeline", Pipeline( memory=None, steps=[ ( "MinMax scaler", MinMaxScaler( copy=True, feature_range=(0, 3.7209871159509307), ), ), ( "MultinomialNB", MultinomialNB( alpha=0.7368421052631579, class_prior=None, fit_prior=True, ), ), ], ), ), ], ) data = np.array([[0, 0], [0, 0], [1, 1], [1, 1]], dtype=np.float32) y = [0, 0, 1, 1] model.fit(data, y) model_onnx = convert_sklearn( model, "pipelinewithinpipeline", [("input", FloatTensorType(data.shape))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMaxNBNone", allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", ) @unittest.skipIf( ColumnTransformer is None, reason="ColumnTransformer not available in 0.19", ) @unittest.skipIf(not onnx_built_with_ml(), reason="Requires ONNX-ML extension.") def test_pipeline_column_transformer_pipeline_imputer_scaler_lr(self): X = np.array([[1, 2], [3, np.nan], [3, 0]], dtype=np.float32) y = np.array([1, 0, 1]) model = Pipeline([ ( "ct", ColumnTransformer([ ( "pipeline1", Pipeline([ ("imputer", SimpleImputer()), ("scaler", StandardScaler()), ]), [0], ), ( "pipeline2", Pipeline([ ("imputer", SimpleImputer()), ("scaler", RobustScaler()), ]), [1], ), ]), ), ("lr", LogisticRegression(solver="liblinear")), ]) model.fit(X, y) model_onnx = convert_sklearn( model, "pipelinewithinpipeline", [("input", FloatTensorType([None, X.shape[1]]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X, model, model_onnx, basename="SklearnPipelineCTPipelineImputerScalerLR", allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", ) if __name__ == "__main__": unittest.main()