Python sklearn.pipeline.FeatureUnion() Examples

The following are 30 code examples of sklearn.pipeline.FeatureUnion(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.pipeline , or try the search function .
Example #1
Source File: 1_problem.py    From pandas-feature-union with MIT License 7 votes vote down vote up
def main():
    raw_data = load_iris()
    data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])

    pipeline = FeatureUnion([
        ("1", make_pipeline(
            FunctionTransformer(lambda X: X.loc[:, ["sepal length (cm)"]]),
            # other transformations
        )),
        ("2", make_pipeline(
            FunctionTransformer(lambda X: X.loc[:, ["sepal width (cm)"]]),
            # other transformations
        ))
    ])

    X = pipeline.fit_transform(data)
    print(X["sepal length (cm)"].mean())
    print(X["sepal width (cm)"].mean()) 
Example #2
Source File: 2_transform_solution.py    From pandas-feature-union with MIT License 7 votes vote down vote up
def main():
    raw_data = load_iris()
    data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])
    data.loc[:, "class"] = raw_data["target"]

    pipeline = FeatureUnion([
        ("1", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal length (cm)"]]),
            # other transformations
        )),
        ("2", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal width (cm)"]]),
            # other transformations
        ))
    ])

    X = pipeline.fit_transform(data)
    print(X["sepal length (cm)"].mean())
    print(X["sepal width (cm)"].mean()) 
Example #3
Source File: train.py    From skorch with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def get_model(with_pipeline=False):
    """Get a multi-layer perceptron model.

    Optionally, put it in a pipeline that scales the data.

    """
    model = NeuralNetClassifier(MLPClassifier)
    if with_pipeline:
        model = Pipeline([
            ('scale', FeatureUnion([
                ('minmax', MinMaxScaler()),
                ('normalize', Normalizer()),
            ])),
            ('select', SelectKBest(k=N_FEATURES)),  # keep input size constant
            ('net', model),
        ])
    return model 
Example #4
Source File: transform_utils.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def transform(self, X):
    if self.func is None:
      return X

    if self.signature:
      input_dims, output_dims = _parse_gufunc_signature(
          signature=self.signature)
    else:
      input_dims, output_dims = [()], [()]

    # This below ensures FeatureUnion's concatenation (hstack) does not fail
    # because of resulting arrays having different number of dims
    if len(input_dims[0]) == 1 and len(output_dims[0]) == 0:
      X = np.expand_dims(X, axis=1)  # Add one extra dimension if (n)->()
    elif len(input_dims[0]) == 0 and len(output_dims[0]) == 1:
      X = np.squeeze(X, axis=1)  # Remove singleton dimension if ()->(n)

    return np.vectorize(self.func, otypes=[np.float], signature=self.signature)(
        X) 
Example #5
Source File: feature_union.py    From mercari-solution with MIT License 6 votes vote down vote up
def fit(self, X, y=None):
        """Fit all transformers using X.

        Parameters
        ----------
        X : iterable or array-like, depending on transformers
            Input data, used to fit transformers.

        y : array-like, shape (n_samples, ...), optional
            Targets for supervised learning.

        Returns
        -------
        self : FeatureUnion
            This estimator
        """
        self.transformer_list = list(self.transformer_list)
        self._validate_transformers()
        with Pool(self.n_jobs) as pool:
            transformers = pool.starmap(_fit_one_transformer,
                                        ((trans, X[trans.steps[0][1].columns], y) for _, trans, _ in self._iter()))
        self._update_transformer_list(transformers)
        return self 
Example #6
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 6 votes vote down vote up
def _filter(obj):
	if isinstance(obj, DataFrameMapper):
		obj.features = _filter_steps(obj.features)
		if hasattr(obj, "built_features"):
			if obj.built_features is not None:
				obj.built_features = _filter_steps(obj.built_features)
	elif isinstance(obj, ColumnTransformer):
		obj.transformers = _filter_steps(obj.transformers)
		obj.remainder = _filter(obj.remainder)
		if hasattr(obj, "transformers_"):
			obj.transformers_ = _filter_steps(obj.transformers_)
	elif isinstance(obj, FeatureUnion):
		obj.transformer_list = _filter_steps(obj.transformer_list)
	elif isinstance(obj, Pipeline):
		obj.steps = _filter_steps(obj.steps)
	elif isinstance(obj, SelectorMixin):
		return SelectorProxy(obj)
	elif isinstance(obj, list):
		return [_filter(e) for e in obj]
	return obj 
Example #7
Source File: test_core_pipeline.py    From lale with Apache License 2.0 6 votes vote down vote up
def test_import_from_sklearn_pipeline_feature_union(self):
        from sklearn.pipeline import FeatureUnion        
        from sklearn.decomposition import PCA
        from sklearn.kernel_approximation import Nystroem
        from sklearn.neighbors import KNeighborsClassifier
        from sklearn.pipeline import make_pipeline
        union = FeatureUnion([("pca", PCA(n_components=1)), ("nys", Nystroem(n_components=2, random_state=42))])        
        sklearn_pipeline = make_pipeline(union, KNeighborsClassifier())
        lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
        self.assertEqual(len(lale_pipeline.edges()), 3)
        from lale.lib.sklearn.pca import PCAImpl
        from lale.lib.sklearn.nystroem import NystroemImpl
        from lale.lib.lale.concat_features import ConcatFeaturesImpl
        from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl
        self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), PCAImpl)
        self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), NystroemImpl)
        self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), KNeighborsClassifierImpl)
        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline) 
Example #8
Source File: field_types.py    From lexpredict-contraxsuite with GNU Affero General Public License v3.0 6 votes vote down vote up
def build_vectorization_pipeline(self) -> Tuple[List[Tuple[str, Any]], Callable[[], List[str]]]:
        vect_numerator = vectorizers.NumberVectorizer()
        vect_denominator = vectorizers.NumberVectorizer()

        def get_feature_names_(vect_numerator, vect_denominator):
            def res():
                return ['numerator_' + str(c) for c in vect_numerator.get_feature_names()] \
                       + ['denominator_' + str(c) for c in vect_denominator.get_feature_names()]

            return res

        return [
                   ('vect', FeatureUnion(transformer_list=[
                       ('numerator', Pipeline([
                           ('selector', vectorizers.DictItemSelector(item='numerator')),
                           ('vect', vect_numerator),
                       ])),
                       ('denominator', Pipeline([
                           ('selector', vectorizers.DictItemSelector(item='denominator')),
                           ('vect', vect_denominator),
                       ]))
                   ]))
               ], get_feature_names_(vect_numerator, vect_denominator) 
Example #9
Source File: test_investigate.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_simple_feature_union(self):
        data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]],
                           dtype=numpy.float32)
        model = FeatureUnion([("scaler1", StandardScaler()),
                             ("scaler2", RobustScaler())])
        model.fit(data)
        all_models = list(enumerate_pipeline_models(model))
        steps = collect_intermediate_steps(model, "feature union",
                                           [("input",
                                             FloatTensorType([None, 2]))])

        assert len(steps) == 2
        assert len(all_models) == 3

        model.transform(data)
        for step in steps:
            onnx_step = step['onnx_step']
            sess = onnxruntime.InferenceSession(onnx_step.SerializeToString())
            onnx_outputs = sess.run(None, {'input': data})
            onnx_output = onnx_outputs[0]
            skl_outputs = step['model']._debug.outputs['transform']
            assert_almost_equal(onnx_output, skl_outputs)
            compare_objects(onnx_output, skl_outputs) 
Example #10
Source File: test_sklearn_feature_union.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_feature_union_default(self):
        data = load_iris()
        X, y = data.data, data.target
        X = X.astype(np.float32)
        X_train, X_test, *_ = train_test_split(X, y, test_size=0.5,
                                               random_state=42)
        model = FeatureUnion([('standard', StandardScaler()),
                              ('minmax', MinMaxScaler())]).fit(X_train)
        model_onnx = convert_sklearn(
            model, 'feature union',
            [('input', FloatTensorType([None, X_test.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(X_test,
                            model,
                            model_onnx,
                            basename="SklearnFeatureUnionDefault") 
Example #11
Source File: test_sklearn_feature_union.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_feature_union_transformer_weights_1(self):
        data = load_digits()
        X, y = data.data, data.target
        X = X.astype(np.int64)
        X_train, X_test, *_ = train_test_split(X, y, test_size=0.5,
                                               random_state=42)
        model = FeatureUnion([('pca', PCA()),
                              ('svd', TruncatedSVD())],
                             transformer_weights={'pca': 10, 'svd': 3}
                             ).fit(X_train)
        model_onnx = convert_sklearn(
            model, 'feature union',
            [('input', Int64TensorType([None, X_test.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X_test,
            model,
            model_onnx,
            basename="SklearnFeatureUnionTransformerWeights1-Dec4",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #12
Source File: test_sklearn_feature_union.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_feature_union_transformer_weights_2(self):
        data = load_digits()
        X, y = data.data, data.target
        X = X.astype(np.float32)
        X_train, X_test, *_ = train_test_split(X, y, test_size=0.5,
                                               random_state=42)
        model = FeatureUnion([('pca', PCA()),
                              ('svd', TruncatedSVD())],
                             transformer_weights={'pca1': 10, 'svd2': 3}
                             ).fit(X_train)
        model_onnx = convert_sklearn(
            model, 'feature union',
            [('input', FloatTensorType([None, X_test.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X_test,
            model,
            model_onnx,
            basename="SklearnFeatureUnionTransformerWeights2-Dec4",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #13
Source File: _base.py    From ibex with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __add__(self, other):
        """

        Returns:
            :py:class:`ibex.sklearn.pipeline.FeatureUnion`
        """

        if isinstance(self, FeatureUnion):
            self_features = [e[1] for e in self.transformer_list]
        else:
            self_features = [self]

        if isinstance(other, FeatureUnion):
            other_features = [e[1] for e in other.transformer_list]
        else:
            other_features = [other]

        combined = self_features + other_features

        return FeatureUnion(_make_pipeline_steps(combined)) 
Example #14
Source File: test_pipeline.py    From sktime with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_FeatureUnion_pipeline():
    # pipeline with segmentation plus multiple feature extraction
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals=3)),
        ('transform', FeatureUnion([
            ('mean', RowTransformer(
                FunctionTransformer(func=np.mean, validate=False))),
            ('std',
             RowTransformer(FunctionTransformer(func=np.std, validate=False)))
        ])),
        ('clf', DecisionTreeClassifier())
    ]
    clf = Pipeline(steps)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    assert y_pred.shape[0] == y_test.shape[0]
    np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test)) 
Example #15
Source File: test_debug_pipeline.py    From scikit-lego with MIT License 6 votes vote down vote up
def test_feature_union(caplog, named_steps):
    pipe_w_default_log_callback = DebugPipeline(named_steps, log_callback="default")
    pipe_w_custom_log_callback = DebugPipeline(named_steps, log_callback=custom_log_callback)

    pipe_union = FeatureUnion(
        [
            ("pipe_w_default_log_callback", pipe_w_default_log_callback),
            ("pipe_w_custom_log_callback", pipe_w_custom_log_callback),
        ]
    )

    caplog.clear()
    with caplog.at_level(logging.INFO):
        pipe_union.fit(IRIS.data, IRIS.target)
    assert caplog.text, f"Log should be none empty: {caplog.text}"
    for pipe in [pipe_w_default_log_callback, pipe_w_custom_log_callback]:
        for _, step in pipe.steps[:-1]:
            assert str(step) in caplog.text, f"{step} should be in: {caplog.text}"
            assert (
                caplog.text.count(str(step)) == 2
            ), f"{step} should be once in {caplog.text}" 
Example #16
Source File: 04_sent.py    From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License 6 votes vote down vote up
def create_union_model(params=None):
    def preprocessor(tweet):
        tweet = tweet.lower()

        for k in emo_repl_order:
            tweet = tweet.replace(k, emo_repl[k])
        for r, repl in re_repl.iteritems():
            tweet = re.sub(r, repl, tweet)

        return tweet.replace("-", " ").replace("_", " ")

    tfidf_ngrams = TfidfVectorizer(preprocessor=preprocessor,
                                   analyzer="word")
    ling_stats = LinguisticVectorizer()
    all_features = FeatureUnion(
        [('ling', ling_stats), ('tfidf', tfidf_ngrams)])
    #all_features = FeatureUnion([('tfidf', tfidf_ngrams)])
    #all_features = FeatureUnion([('ling', ling_stats)])
    clf = MultinomialNB()
    pipeline = Pipeline([('all', all_features), ('clf', clf)])

    if params:
        pipeline.set_params(**params)

    return pipeline 
Example #17
Source File: test_pipeline.py    From skits with MIT License 6 votes vote down vote up
def test_multiouput_forecast(self):
        # TODO: Make this a real test

        steps = [
            ("pre_horizon", HorizonTransformer(horizon=4)),
            ("pre_imputer", ReversibleImputer(y_only=True)),
            (
                "features",
                FeatureUnion(
                    [("ar_transformer", AutoregressiveTransformer(num_lags=3))]
                ),
            ),
            ("post_lag_imputer", ReversibleImputer()),
            ("regressor", LinearRegression()),
        ]

        pipeline = ForecasterPipeline(steps)

        l = np.linspace(0, 1, 100)
        y = np.sin(2 * np.pi * 5 * l) + np.random.normal(0, 0.1, size=100)

        pipeline.fit(y[:, np.newaxis], y)

        pipeline.forecast(y[:, np.newaxis], 20) 
Example #18
Source File: test_pipeline.py    From skits with MIT License 6 votes vote down vote up
def test_multiouput_prediction(self):
        # TODO: Make this a real test

        steps = [
            ("pre_horizon", HorizonTransformer(horizon=4)),
            ("pre_imputer", ReversibleImputer(y_only=True)),
            (
                "features",
                FeatureUnion(
                    [("ar_transformer", AutoregressiveTransformer(num_lags=3))]
                ),
            ),
            ("post_lag_imputer", ReversibleImputer()),
            ("regressor", LinearRegression()),
        ]

        pipeline = ForecasterPipeline(steps)

        l = np.linspace(0, 1, 100)
        y = np.sin(2 * np.pi * 5 * l) + np.random.normal(0, 0.1, size=100)

        pipeline.fit(y[:, np.newaxis], y)

        pipeline.predict(y[:, np.newaxis], to_scale=True, refit=True) 
Example #19
Source File: test_core_pipeline.py    From lale with Apache License 2.0 6 votes vote down vote up
def test_export_to_sklearn_pipeline3(self):
        from lale.lib.lale import ConcatFeatures
        from lale.lib.sklearn import PCA
        from lale.lib.sklearn import KNeighborsClassifier, LogisticRegression, SVC 
        from sklearn.feature_selection import SelectKBest
        from lale.lib.sklearn import Nystroem
        from sklearn.pipeline import FeatureUnion

        lale_pipeline = ((PCA() >> SelectKBest(k=2)) & (Nystroem(random_state = 42) >> SelectKBest(k=3))
         & (SelectKBest(k=3))) >> ConcatFeatures() >> SelectKBest(k=2) >> LogisticRegression()
        trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
        sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
        self.assertIsInstance(sklearn_pipeline.named_steps['featureunion'], FeatureUnion)
        self.assertIsInstance(sklearn_pipeline.named_steps['selectkbest'], SelectKBest)
        from sklearn.linear_model import LogisticRegression
        self.assertIsInstance(sklearn_pipeline.named_steps['logisticregression'], LogisticRegression)
        self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline) 
Example #20
Source File: pipeline.py    From sparkit-learn with Apache License 2.0 6 votes vote down vote up
def make_sparkunion(*transformers):
    """Construct a FeatureUnion from the given transformers.
    This is a shorthand for the FeatureUnion constructor; it does not require,
    and does not permit, naming the transformers. Instead, they will be given
    names automatically based on their types. It also does not allow weighting.
    Examples
    --------
    >>> from sklearn.decomposition import PCA, TruncatedSVD
    >>> make_union(PCA(), TruncatedSVD())    # doctest: +NORMALIZE_WHITESPACE
    FeatureUnion(n_jobs=1,
                 transformer_list=[('pca', PCA(copy=True, n_components=None,
                                               whiten=False)),
                                   ('truncatedsvd',
                                    TruncatedSVD(algorithm='randomized',
                                                 n_components=2, n_iter=5,
                                                 random_state=None, tol=0.0))],
                 transformer_weights=None)
    Returns
    -------
    f : FeatureUnion
    """
    return SparkFeatureUnion(_name_estimators(transformers)) 
Example #21
Source File: test_pipeline.py    From sparkit-learn with Apache License 2.0 6 votes vote down vote up
def test_same_result_weight(self):
        X, Z = self.make_text_rdd(2)

        loc_char = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
        dist_char = SparkCountVectorizer(analyzer="char_wb", ngram_range=(3, 3))

        loc_word = CountVectorizer(analyzer="word")
        dist_word = SparkCountVectorizer(analyzer="word")

        loc_union = FeatureUnion([
            ("chars", loc_char),
            ("words", loc_word)
        ], transformer_weights={"words": 10})
        dist_union = SparkFeatureUnion([
            ("chars", dist_char),
            ("words", dist_word)
        ], transformer_weights={"words": 10})

        loc_union.fit(X)
        dist_union.fit(Z)

        X_transformed = loc_union.transform(X)
        Z_transformed = sp.vstack(dist_union.transform(Z).collect())
        assert_array_equal(X_transformed.toarray(), Z_transformed.toarray()) 
Example #22
Source File: feature_extraction.py    From mne-features with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _apply_extractor(extractor, X, return_as_df):
    """Utility function to apply features extractor to ndarray X.

    Parameters
    ----------
    extractor : Instance of :class:`~sklearn.pipeline.FeatureUnion` or
    :class:`~sklearn.pipeline.Pipeline`.

    X : ndarray, shape (n_channels, n_times)

    return_as_df : bool

    Returns
    -------
    X : ndarray, shape (n_features,)

    feature_names : list of str | None
        Not None, only if ``return_as_df`` is True.
    """
    X = extractor.fit_transform(X)
    feature_names = None
    if return_as_df:
        feature_names = extractor.get_feature_names()
    return X, feature_names 
Example #23
Source File: test_pipeline.py    From sktime with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_FeatureUnion():
    X, y = load_gunpoint(return_X_y=True)
    ft = FunctionTransformer(func=np.mean, validate=False)
    t = RowTransformer(ft)
    fu = FeatureUnion([
        ('mean', t),
        ('std',
         RowTransformer(FunctionTransformer(func=np.std, validate=False)))
    ])
    Xt = fu.fit_transform(X, y)
    assert Xt.shape == (X.shape[0], X.shape[1] * len(fu.transformer_list)) 
Example #24
Source File: test_feature_importances_.py    From sktime with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_feature_importances_single_feature_interval_and_estimator():
    random_state = 1234

    # Compute using default method
    features = [np.mean]
    steps = [('transform', RandomIntervalFeatureExtractor(
                n_intervals=1,
                features=features,
                random_state=random_state)),
             ('clf', DecisionTreeClassifier())]
    base_estimator = Pipeline(steps)
    clf1 = TimeSeriesForestClassifier(estimator=base_estimator,
                                      random_state=random_state,
                                      n_estimators=1)
    clf1.fit(X_train, y_train)

    # Extract the interval and the estimator, and compute using pipelines
    intervals = clf1.estimators_[0].steps[0][1].intervals_
    steps = [
        ('segment', IntervalSegmenter(intervals)),
        ('transform', FeatureUnion([
            ('mean', RowTransformer(
                FunctionTransformer(func=np.mean, validate=False)))
            ])),
        ('clf', clone(clf1.estimators_[0].steps[-1][1]))
    ]
    clf2 = Pipeline(steps)
    clf2.fit(X_train, y_train)

    # Check for feature importances obtained from the estimators
    fi_expected = clf1.estimators_[0].steps[-1][1].feature_importances_
    fi_actual = clf2.steps[-1][1].feature_importances_
    np.testing.assert_array_equal(fi_actual, fi_expected)


# Check for 4 more complex cases with 3 features, with both numbers of
# intervals and estimators varied from 1 to 2.
# Feature importances from each estimator on each interval, and
# normalised feature values of the time series are checked using
# different but equivalent implementations 
Example #25
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_predict_transform(self):
		predict_transformer = FeatureUnion([
			("identity", FunctionTransformer(None)),
			("log10", FunctionTransformer(numpy.log10))
		])
		pipeline = PMMLPipeline([("estimator", DummyRegressor())], predict_transformer = predict_transformer)
		X = DataFrame([[1, 0], [2, 0], [3, 0]], columns = ["X1", "X2"])
		y = Series([0.5, 1.0, 1.5], name = "y")
		pipeline.fit(X, y)
		y_pred = [1.0, 1.0, 1.0]
		y_predt = [1.0, 1.0, numpy.log10(1.0)]
		self.assertEqual(y_pred, pipeline.predict(X).tolist())
		self.assertEqual([y_predt for i in range(0, 3)], pipeline.predict_transform(X).tolist()) 
Example #26
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_timedelta_days(self):
		X = DataFrame([["2018-12-31", "2019-01-01"], ["2019-01-31", "2019-01-01"]], columns = ["left", "right"])
		pipeline = clone(Pipeline([
			("union", FeatureUnion([
				("left_mapper", DataFrameMapper([
					("left", [DateDomain(), DaysSinceYearTransformer(year = 2010)])
				])),
				("right_mapper", DataFrameMapper([
					("right", [DateDomain(), DaysSinceYearTransformer(year = 2010)])
				]))
			])),
			("expression", Alias(ExpressionTransformer("X[0] - X[1]"), "delta(left, right)", prefit = True))
		]))
		Xt = pipeline.fit_transform(X)
		self.assertEqual([[-1], [30]], Xt.tolist()) 
Example #27
Source File: methods.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def feature_union(names, steps, weights):
    """Reconstruct a FeatureUnion from names, steps, and weights"""
    steps, times = zip(*map(_maybe_timed, steps))
    fit_time = sum(times)
    if any(s is FIT_FAILURE for s in steps):
        fit_est = FIT_FAILURE
    else:
        fit_est = FeatureUnion(list(zip(names, steps)), transformer_weights=weights)
    return fit_est, fit_time 
Example #28
Source File: _parse.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def build_sklearn_parsers_map():
    map_parser = {
        pipeline.Pipeline: _parse_sklearn_pipeline,
        pipeline.FeatureUnion: _parse_sklearn_feature_union,
        GaussianProcessRegressor: _parse_sklearn_gaussian_process,
        GridSearchCV: _parse_sklearn_grid_search_cv,
    }
    if ColumnTransformer is not None:
        map_parser[ColumnTransformer] = _parse_sklearn_column_transformer

    for tmodel in sklearn_classifier_list:
        if tmodel not in [LinearSVC]:
            map_parser[tmodel] = _parse_sklearn_classifier
    return map_parser 
Example #29
Source File: lr_model.py    From autogluon with Apache License 2.0 5 votes vote down vote up
def preprocess_train(self, X, feature_types, vect_max_features):
        transformer_list = []
        if len(feature_types['language']) > 0:
            pipeline = Pipeline(steps=[
                ("preparator", NlpDataPreprocessor(nlp_cols=feature_types['language'])),
                ("vectorizer",
                 TfidfVectorizer(ngram_range=self.params['proc.ngram_range'], sublinear_tf=True, max_features=vect_max_features, tokenizer=self.tokenize))
            ])
            transformer_list.append(('vect', pipeline))
        if len(feature_types['onehot']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', OheFeaturesGenerator(cats_cols=feature_types['onehot'])),
            ])
            transformer_list.append(('cats', pipeline))
        if len(feature_types['continuous']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', NumericDataPreprocessor(cont_cols=feature_types['continuous'])),
                ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])),
                ('scaler', StandardScaler())
            ])
            transformer_list.append(('cont', pipeline))
        if len(feature_types['skewed']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', NumericDataPreprocessor(cont_cols=feature_types['skewed'])),
                ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])),
                ('quantile', QuantileTransformer(output_distribution='normal')),  # Or output_distribution = 'uniform'
            ])
            transformer_list.append(('skew', pipeline))
        self.pipeline = FeatureUnion(transformer_list=transformer_list)
        self.pipeline.fit(X) 
Example #30
Source File: test_pipeline.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_feature_union_weights():
    # test feature union with transformer weights
    iris = load_iris()
    X = iris.data
    y = iris.target
    pca = PCA(n_components=2, svd_solver='randomized', random_state=0)
    select = SelectKBest(k=1)
    # test using fit followed by transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    fs.fit(X, y)
    X_transformed = fs.transform(X)
    # test using fit_transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    X_fit_transformed = fs.fit_transform(X, y)
    # test it works with transformers missing fit_transform
    fs = FeatureUnion([("mock", Transf()), ("pca", pca), ("select", select)],
                      transformer_weights={"mock": 10})
    X_fit_transformed_wo_method = fs.fit_transform(X, y)
    # check against expected result

    # We use a different pca object to control the random_state stream
    assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
    assert_array_equal(X_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_array_almost_equal(X_fit_transformed[:, :-1],
                              10 * pca.fit_transform(X))
    assert_array_equal(X_fit_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))