Python sklearn.preprocessing.FunctionTransformer() Examples

The following are 30 code examples of sklearn.preprocessing.FunctionTransformer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function

Example #1

Source File: test_compose.py From sktime with BSD 3-Clause "New" or "Revised" License

7 votes

def test_ColumnTransformer_pipeline():
    X_train, y_train = load_basic_motions(split="train", return_X_y=True)
    X_test, y_test = load_basic_motions(split="test", return_X_y=True)

    # using Identity function transformers (transform series to series)
    def id_func(X):
        return X
    column_transformer = ColumnTransformer([
        ('id0', FunctionTransformer(func=id_func, validate=False), ['dim_0']),
        ('id1', FunctionTransformer(func=id_func, validate=False), ['dim_1'])
    ])
    steps = [
        ('extract', column_transformer),
        ('tabularise', Tabularizer()),
        ('classify', RandomForestClassifier(n_estimators=2, random_state=1))]
    model = Pipeline(steps=steps)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    assert y_pred.shape[0] == y_test.shape[0]
    np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))

Example #2

Source File: test_RandomIntervalFeatureExtractor.py From sktime with BSD 3-Clause "New" or "Revised" License

6 votes

def test_different_implementations():
    random_state = 1233
    X_train, y_train = make_classification_problem()

    # Compare with chained transformations.
    tran1 = RandomIntervalSegmenter(n_intervals='sqrt',
                                    random_state=random_state)
    tran2 = RowTransformer(FunctionTransformer(func=np.mean, validate=False))
    A = tran2.fit_transform(tran1.fit_transform(X_train))

    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt',
                                          features=[np.mean],
                                          random_state=random_state)
    B = tran.fit_transform(X_train)

    np.testing.assert_array_equal(A, B)


# Compare with transformer pipeline using TSFeatureUnion.

Example #3

Source File: test_RandomIntervalFeatureExtractor.py From sktime with BSD 3-Clause "New" or "Revised" License

6 votes

def test_different_pipelines():
    random_state = 1233
    X_train, y_train = make_classification_problem()
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals='sqrt',
                                            random_state=random_state)),
        ('transform', FeatureUnion([
            ('mean', RowTransformer(
                FunctionTransformer(func=np.mean, validate=False))),
            ('std',
             RowTransformer(FunctionTransformer(func=np.std, validate=False))),
            ('slope', RowTransformer(
                FunctionTransformer(func=time_series_slope, validate=False))),
        ])),
    ]
    pipe = Pipeline(steps)
    a = pipe.fit_transform(X_train)
    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt',
                                          features=[np.mean, np.std,
                                                    time_series_slope],
                                          random_state=random_state)
    b = tran.fit_transform(X_train)
    np.testing.assert_array_equal(a, b)
    np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)

Example #4

Source File: sindy.py From sparsereg with MIT License

6 votes

def __init__(
        self,
        alpha=1.0,
        threshold=0.1,
        degree=3,
        operators=None,
        dt=1.0,
        n_jobs=1,
        derivative=None,
        feature_names=None,
        kw={},
    ):
        self.alpha = alpha
        self.threshold = threshold
        self.degree = degree
        self.operators = operators
        self.n_jobs = n_jobs
        self.derivative = derivative or FunctionTransformer(func=_derivative, kw_args={"dt": dt})
        self.feature_names = feature_names
        self.kw = kw

Example #5

Source File: common_tabular_tests.py From interpret-community with MIT License

6 votes

def _get_transformations_one_to_many_greater(self, feature_names):
        # results in number of features greater than original features
        # copy all features except last one. For last one, replicate columns to create 3 more features
        transformations = []
        feature_names = list(feature_names)
        index = 0
        for f in feature_names[:-1]:
            transformations.append(("{}".format(index), "passthrough", [f]))
            index += 1

        def copy_func(x):
            return np.tile(x, (1, 3))

        copy_transformer = FunctionTransformer(copy_func)

        transformations.append(("copy_transformer", copy_transformer, [feature_names[-1]]))

        return ColumnTransformer(transformations)

Example #6

Source File: dataloader.py From models with MIT License

6 votes

def __init__(self, pos_features, pipeline_obj_path):
        """
        Args:
          pos_features: list of positional features to use
          pipeline_obj_path: path to the serialized pipeline obj_path
        """
        self.pos_features = pos_features
        self.pipeline_obj_path = pipeline_obj_path

        # deserialize the pickle file
        with open(self.pipeline_obj_path, "rb") as f:
            pipeline_obj = pickle.load(f)
        self.POS_FEATURES = pipeline_obj[0]
        self.minmax_scaler = pipeline_obj[1]
        self.imp = pipeline_obj[2]

        self.funct_transform = FunctionTransformer(func=sign_log_func,
                                                   inverse_func=sign_log_func_inverse)
        # for simplicity, assume all current pos_features are the
        # same as from before
        assert self.POS_FEATURES == self.pos_features

Example #7

Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
        self.assertIs(df.preprocessing.FunctionTransformer,
                      pp.FunctionTransformer)
        self.assertIs(df.preprocessing.Imputer, pp.Imputer)
        self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
        self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
        self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
        self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
        self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
        self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
        self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
        self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
        self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
        self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
        self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler)

Example #8

Source File: test_pipeline.py From sktime with BSD 3-Clause "New" or "Revised" License

6 votes

def test_FeatureUnion_pipeline():
    # pipeline with segmentation plus multiple feature extraction
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals=3)),
        ('transform', FeatureUnion([
            ('mean', RowTransformer(
                FunctionTransformer(func=np.mean, validate=False))),
            ('std',
             RowTransformer(FunctionTransformer(func=np.std, validate=False)))
        ])),
        ('clf', DecisionTreeClassifier())
    ]
    clf = Pipeline(steps)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    assert y_pred.shape[0] == y_test.shape[0]
    np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))

Example #9

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def test_predict_proba_transform(self):
		predict_proba_transformer = FunctionTransformer(numpy.log)
		pipeline = PMMLPipeline([("estimator", DummyClassifier(strategy = "prior"))], predict_proba_transformer = predict_proba_transformer)
		X = DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, 1.0], columns = ["x"])
		y = Series(["green", "red", "yellow", "green", "red", "green"], name = "y")
		pipeline.fit(X, y)
		self.assertEqual(["green", "red", "yellow"], pipeline._final_estimator.classes_.tolist())
		y_proba = [3 / 6.0, 2 / 6.0, 1 / 6.0]
		y_probat = [numpy.log(x) for x in y_proba]
		self.assertEqual([y_proba for i in range(0, 6)], pipeline.predict_proba(X).tolist())
		self.assertEqual([y_proba + y_probat for i in range(0, 6)], pipeline.predict_proba_transform(X).tolist())

Example #10

Source File: test_compose.py From sktime with BSD 3-Clause "New" or "Revised" License

5 votes

def test_row_transformer_function_transformer_series_to_primitives():
    X, y = load_gunpoint(return_X_y=True)
    ft = FunctionTransformer(func=np.mean, validate=False)
    t = RowTransformer(ft)
    Xt = t.fit_transform(X, y)
    assert Xt.shape == X.shape
    assert isinstance(Xt.iloc[0, 0],
                      float)  # check series-to-primitive transforms

Example #11

Source File: test_pipeline.py From sktime with BSD 3-Clause "New" or "Revised" License

5 votes

def test_FeatureUnion():
    X, y = load_gunpoint(return_X_y=True)
    ft = FunctionTransformer(func=np.mean, validate=False)
    t = RowTransformer(ft)
    fu = FeatureUnion([
        ('mean', t),
        ('std',
         RowTransformer(FunctionTransformer(func=np.std, validate=False)))
    ])
    Xt = fu.fit_transform(X, y)
    assert Xt.shape == (X.shape[0], X.shape[1] * len(fu.transformer_list))

Example #12

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def test_predict_transform(self):
		predict_transformer = FeatureUnion([
			("identity", FunctionTransformer(None)),
			("log10", FunctionTransformer(numpy.log10))
		])
		pipeline = PMMLPipeline([("estimator", DummyRegressor())], predict_transformer = predict_transformer)
		X = DataFrame([[1, 0], [2, 0], [3, 0]], columns = ["X1", "X2"])
		y = Series([0.5, 1.0, 1.5], name = "y")
		pipeline.fit(X, y)
		y_pred = [1.0, 1.0, 1.0]
		y_predt = [1.0, 1.0, numpy.log10(1.0)]
		self.assertEqual(y_pred, pipeline.predict(X).tolist())
		self.assertEqual([y_predt for i in range(0, 3)], pipeline.predict_transform(X).tolist())

Example #13

Source File: test_compose.py From sktime with BSD 3-Clause "New" or "Revised" License

5 votes

def test_row_transformer_function_transformer_series_to_series():
    X, y = load_gunpoint(return_X_y=True)

    # series-to-series transform function
    def powerspectrum(x):
        fft = np.fft.fft(x)
        ps = fft.real * fft.real + fft.imag * fft.imag
        return ps[:ps.shape[0] // 2]

    ft = FunctionTransformer(func=powerspectrum, validate=False)
    t = RowTransformer(ft)
    Xt = t.fit_transform(X, y)
    assert Xt.shape == X.shape
    assert isinstance(Xt.iloc[0, 0], (
        pd.Series, np.ndarray))  # check series-to-series transforms

Example #14

Source File: test_sklearn_model_export.py From mlflow with Apache License 2.0

5 votes

def sklearn_custom_transformer_model(sklearn_knn_model):
    def transform(vec):
        print("Invoking custom transformer!")
        return vec + 1

    transformer = SKFunctionTransformer(transform, validate=True)
    pipeline = SKPipeline([("custom_transformer", transformer), ("knn", sklearn_knn_model.model)])
    return ModelWithData(pipeline, inference_data=datasets.load_iris().data[:, :2])

Example #15

Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_FunctionTransformer(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        mod1 = df.pp.FunctionTransformer(func=lambda x: x + 1)
        df.fit(mod1)
        result = df.transform(mod1)

        exp = df.copy()
        exp.data = exp.data + 1

        self.assertIsInstance(result, pdml.ModelFrame)
        tm.assert_frame_equal(result, exp)

Example #16

Source File: test_function_transformer.py From twitter-stock-recommendation with MIT License

5 votes

def test_np_log():
    X = np.arange(10).reshape((5, 2))

    # Test that the numpy.log example still works.
    assert_array_equal(
        FunctionTransformer(np.log1p).transform(X),
        np.log1p(X),
    )

Example #17

Source File: test_function_transformer.py From twitter-stock-recommendation with MIT License

5 votes

def test_kw_arg():
    X = np.linspace(0, 1, num=10).reshape((5, 2))

    F = FunctionTransformer(np.around, kw_args=dict(decimals=3))

    # Test that rounding is correct
    assert_array_equal(F.transform(X),
                       np.around(X, decimals=3))

Example #18

Source File: test_function_transformer.py From twitter-stock-recommendation with MIT License

5 votes

def test_kw_arg_update():
    X = np.linspace(0, 1, num=10).reshape((5, 2))

    F = FunctionTransformer(np.around, kw_args=dict(decimals=3))

    F.kw_args['decimals'] = 1

    # Test that rounding is correct
    assert_array_equal(F.transform(X), np.around(X, decimals=1))

Example #19

Source File: test_function_transformer.py From twitter-stock-recommendation with MIT License

5 votes

def test_kw_arg_reset():
    X = np.linspace(0, 1, num=10).reshape((5, 2))

    F = FunctionTransformer(np.around, kw_args=dict(decimals=3))

    F.kw_args = dict(decimals=1)

    # Test that rounding is correct
    assert_array_equal(F.transform(X), np.around(X, decimals=1))

Example #20

Source File: mercari_golf.py From mercari-solution with MIT License

5 votes

def main():
    vectorizer = make_union(
        on_field('name', Tfidf(max_features=100000, token_pattern='\w+')),
        on_field('text', Tfidf(max_features=100000, token_pattern='\w+', ngram_range=(1, 2))),
        on_field(['shipping', 'item_condition_id'],
                 FunctionTransformer(to_records, validate=False), DictVectorizer()),
        n_jobs=4)
    y_scaler = StandardScaler()
    with timer('process train'):
        train = pd.read_table('../input/train.tsv')
        train = train[train['price'] > 0].reset_index(drop=True)
        cv = KFold(n_splits=20, shuffle=True, random_state=42)
        train_ids, valid_ids = next(cv.split(train))
        train, valid = train.iloc[train_ids], train.iloc[valid_ids]
        y_train = y_scaler.fit_transform(np.log1p(train['price'].values.reshape(-1, 1)))
        X_train = vectorizer.fit_transform(preprocess(train)).astype(np.float32)
        print(f'X_train: {X_train.shape} of {X_train.dtype}')
        del train
    with timer('process valid'):
        X_valid = vectorizer.transform(preprocess(valid)).astype(np.float32)
    with ThreadPool(processes=4) as pool:
        Xb_train, Xb_valid = [x.astype(np.bool).astype(np.float32) for x in [X_train, X_valid]]
        xs = [[Xb_train, Xb_valid], [X_train, X_valid]] * 2
        y_pred = np.mean(pool.map(partial(fit_predict, y_train=y_train), xs), axis=0)
    y_pred = np.expm1(y_scaler.inverse_transform(y_pred.reshape(-1, 1))[:, 0])
    print('Valid RMSLE: {:.4f}'.format(np.sqrt(mean_squared_log_error(valid['price'], y_pred))))

Example #21

Source File: test_transformers.py From gordo with GNU Affero General Public License v3.0

5 votes

def test_multiply_by_function_transformer(self):
        from gordo.machine.model.transformer_funcs.general import multiply_by

        # Provide a require argument
        tf = FunctionTransformer(func=multiply_by, kw_args={"factor": 2})
        self._validate_transformer(tf)

        # Ignore the required argument
        tf = FunctionTransformer(func=multiply_by)
        with self.assertRaises(TypeError):
            self._validate_transformer(tf)

Example #22

Source File: test_target.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_transform_target_regressor_1d_transformer(X, y):
    # All transformer in scikit-learn expect 2D data. FunctionTransformer with
    # validate=False lift this constraint without checking that the input is a
    # 2D vector. We check the consistency of the data shape using a 1D and 2D y
    # array.
    transformer = FunctionTransformer(func=lambda x: x + 1,
                                      inverse_func=lambda x: x - 1,
                                      validate=False)
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      transformer=transformer)
    y_pred = regr.fit(X, y).predict(X)
    assert y.shape == y_pred.shape
    # consistency forward transform
    y_tran = regr.transformer_.transform(y)
    _check_shifted_by_one(y, y_tran)
    assert y.shape == y_pred.shape
    # consistency inverse transform
    assert_allclose(y, regr.transformer_.inverse_transform(
        y_tran).squeeze())
    # consistency of the regressor
    lr = LinearRegression()
    transformer2 = clone(transformer)
    lr.fit(X, transformer2.fit_transform(y))
    y_lr_pred = lr.predict(X)
    assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
    assert_allclose(regr.regressor_.coef_, lr.coef_)

Example #23