Python sklearn.pipeline.make_pipeline() Examples

The following are 30 code examples of sklearn.pipeline.make_pipeline(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.pipeline , or try the search function .
Example #1
Source File: 2_transform_solution.py    From pandas-feature-union with MIT License 7 votes vote down vote up
def main():
    raw_data = load_iris()
    data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])
    data.loc[:, "class"] = raw_data["target"]

    pipeline = FeatureUnion([
        ("1", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal length (cm)"]]),
            # other transformations
        )),
        ("2", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal width (cm)"]]),
            # other transformations
        ))
    ])

    X = pipeline.fit_transform(data)
    print(X["sepal length (cm)"].mean())
    print(X["sepal width (cm)"].mean()) 
Example #2
Source File: 1_problem.py    From pandas-feature-union with MIT License 7 votes vote down vote up
def main():
    raw_data = load_iris()
    data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])

    pipeline = FeatureUnion([
        ("1", make_pipeline(
            FunctionTransformer(lambda X: X.loc[:, ["sepal length (cm)"]]),
            # other transformations
        )),
        ("2", make_pipeline(
            FunctionTransformer(lambda X: X.loc[:, ["sepal width (cm)"]]),
            # other transformations
        ))
    ])

    X = pipeline.fit_transform(data)
    print(X["sepal length (cm)"].mean())
    print(X["sepal width (cm)"].mean()) 
Example #3
Source File: test_pipeline.py    From Mastering-Elasticsearch-7.0 with MIT License 7 votes vote down vote up
def test_make_pipeline():
    t1 = Transf()
    t2 = Transf()
    pipe = make_pipeline(t1, t2)
    assert isinstance(pipe, Pipeline)
    assert_equal(pipe.steps[0][0], "transf-1")
    assert_equal(pipe.steps[1][0], "transf-2")

    pipe = make_pipeline(t1, t2, FitParamT())
    assert isinstance(pipe, Pipeline)
    assert_equal(pipe.steps[0][0], "transf-1")
    assert_equal(pipe.steps[1][0], "transf-2")
    assert_equal(pipe.steps[2][0], "fitparamt")

    assert_raise_message(
        TypeError,
        'Unknown keyword arguments: "random_parameter"',
        make_pipeline, t1, t2, random_parameter='rnd'
    ) 
Example #4
Source File: benchmark_test.py    From nni with MIT License 6 votes vote down vote up
def test_time(pipeline_name, name, path):
    if pipeline_name == "LR":
        pipeline = make_pipeline(LogisticRegression())

    if pipeline_name == "FGS":
        pipeline = make_pipeline(FeatureGradientSelector(), LogisticRegression())

    if pipeline_name == "Tree":
        pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression())
    
    test_benchmark = Benchmark()
    print("Dataset:\t", name)
    print("Pipeline:\t", pipeline_name)
    starttime = datetime.datetime.now()
    test_benchmark.run_test(pipeline, name, path)
    endtime = datetime.datetime.now()
    print("Used time: ", (endtime - starttime).microseconds/1000)
    print("") 
Example #5
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_gradient_boosting_with_init_pipeline():
    # Check that the init estimator can be a pipeline (see issue #13466)

    X, y = make_regression(random_state=0)
    init = make_pipeline(LinearRegression())
    gb = GradientBoostingRegressor(init=init)
    gb.fit(X, y)  # pipeline without sample_weight works fine

    with pytest.raises(
            ValueError,
            match='The initial estimator Pipeline does not support sample '
                  'weights'):
        gb.fit(X, y, sample_weight=np.ones(X.shape[0]))

    # Passing sample_weight to a pipeline raises a ValueError. This test makes
    # sure we make the distinction between ValueError raised by a pipeline that
    # was passed sample_weight, and a ValueError raised by a regular estimator
    # whose input checking failed.
    with pytest.raises(
            ValueError,
            match='nu <= 0 or nu > 1'):
        # Note that NuSVR properly supports sample_weight
        init = NuSVR(gamma='auto', nu=1.5)
        gb = GradientBoostingRegressor(init=init)
        gb.fit(X, y, sample_weight=np.ones(X.shape[0])) 
Example #6
Source File: test_pprint.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_pipeline():
    # Render a pipeline object
    pipeline = make_pipeline(StandardScaler(), LogisticRegression(C=999))
    expected = """
Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('logisticregression',
                 LogisticRegression(C=999, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=100,
                                    multi_class='warn', n_jobs=None,
                                    penalty='l2', random_state=None,
                                    solver='warn', tol=0.0001, verbose=0,
                                    warm_start=False))],
         verbose=False)"""

    expected = expected[1:]  # remove first \n
    assert pipeline.__repr__() == expected 
Example #7
Source File: test_core_pipeline.py    From lale with Apache License 2.0 6 votes vote down vote up
def test_import_from_sklearn_pipeline_feature_union(self):
        from sklearn.pipeline import FeatureUnion        
        from sklearn.decomposition import PCA
        from sklearn.kernel_approximation import Nystroem
        from sklearn.neighbors import KNeighborsClassifier
        from sklearn.pipeline import make_pipeline
        union = FeatureUnion([("pca", PCA(n_components=1)), ("nys", Nystroem(n_components=2, random_state=42))])        
        sklearn_pipeline = make_pipeline(union, KNeighborsClassifier())
        lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
        self.assertEqual(len(lale_pipeline.edges()), 3)
        from lale.lib.sklearn.pca import PCAImpl
        from lale.lib.sklearn.nystroem import NystroemImpl
        from lale.lib.lale.concat_features import ConcatFeaturesImpl
        from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl
        self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), PCAImpl)
        self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), NystroemImpl)
        self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), KNeighborsClassifierImpl)
        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline) 
Example #8
Source File: 3_feature_union_solution.py    From pandas-feature-union with MIT License 6 votes vote down vote up
def main():
    raw_data = load_iris()
    data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])
    data.loc[:, "class"] = raw_data["target"]

    pipeline = PandasFeatureUnion([
        ("1", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal length (cm)"]]),
            # other transformations
        )),
        ("2", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal width (cm)"]]),
            # other transformations
        ))
    ])

    X = pipeline.fit_transform(data)
    print(X["sepal length (cm)"].mean())
    print(X["sepal width (cm)"].mean()) 
Example #9
Source File: test_core_pipeline.py    From lale with Apache License 2.0 6 votes vote down vote up
def test_compare_with_sklearn(self):
        from lale.operators import make_pipeline
        tfm = PCA()
        clf = LogisticRegression(LogisticRegression.solver.lbfgs, LogisticRegression.multi_class.auto)
        trainable = make_pipeline(tfm, clf)
        digits = sklearn.datasets.load_digits()
        trained = trainable.fit(digits.data, digits.target)
        predicted = trained.predict(digits.data)
        from sklearn.pipeline import make_pipeline as scikit_make_pipeline
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.linear_model import LogisticRegression as SklearnLR
        sklearn_pipeline = scikit_make_pipeline(SklearnPCA(), SklearnLR(solver="lbfgs", multi_class="auto"))
        sklearn_pipeline.fit(digits.data, digits.target)
        predicted_sklearn = sklearn_pipeline.predict(digits.data)

        from sklearn.metrics import accuracy_score
        lale_score = accuracy_score(digits.target, predicted)
        scikit_score = accuracy_score(digits.target, predicted_sklearn)
        self.assertEqual(lale_score, scikit_score) 
Example #10
Source File: test_coordinate_descent.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_lasso_cv_with_some_model_selection():
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import StratifiedKFold
    from sklearn import datasets
    from sklearn.linear_model import LassoCV

    diabetes = datasets.load_diabetes()
    X = diabetes.data
    y = diabetes.target

    pipe = make_pipeline(
        StandardScaler(),
        LassoCV(cv=StratifiedKFold(n_splits=5))
    )
    pipe.fit(X, y) 
Example #11
Source File: language_detector.py    From text-mining-class with MIT License 6 votes vote down vote up
def build_language_classifier(texts, labels, verbose=False, random_state=None):
    """Train a text classifier with scikit-learn

    The text classifier is composed of two elements assembled in a pipeline:

    - A text feature extractor (`TfidfVectorizer`) that extract the relative
      frequencies of unigrams, bigrams and trigrams of characters in the text.

    - An instance of `SGDClassifier` for the classification it-self. To speed
      up training it is recommended to enable early stopping.

    `random_state` is passed to the underlying `SGDClassifier` instance.
    """
    language_classifier = make_pipeline(
        TfidfVectorizer(analyzer="char", ngram_range=(1, 3),
                        min_df=2, max_df=0.9, norm="l2", dtype=np.float32),
        SGDClassifier(early_stopping=True, validation_fraction=0.2,
                      n_iter_no_change=3, max_iter=1000, tol=1e-3,
                      alpha=1e-5, penalty="l2", verbose=verbose,
                      random_state=random_state)
    )
    return language_classifier.fit(texts, labels) 
Example #12
Source File: tests.py    From scikit-mdr with MIT License 6 votes vote down vote up
def test_mdr_sklearn_pipeline_parallel():
    """Ensure that MDR can be used as a transformer in a parallelized scikit-learn pipeline"""
    features = np.array([[2,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [1,    1],
                         [1,    1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
    clf = make_pipeline(MDR(), LogisticRegression())
    cv_scores = cross_val_score(clf, features, classes, cv=StratifiedKFold(n_splits=5, shuffle=True), n_jobs=-1)
    assert np.mean(cv_scores) > 0. 
Example #13
Source File: tests.py    From scikit-mdr with MIT License 6 votes vote down vote up
def test_mdr_sklearn_pipeline():
    """Ensure that MDR can be used as a transformer in a scikit-learn pipeline"""
    features = np.array([[2,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [1,    1],
                         [1,    1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
    clf = make_pipeline(MDR(), LogisticRegression())
    cv_scores = cross_val_score(clf, features, classes, cv=StratifiedKFold(n_splits=5, shuffle=True))
    assert np.mean(cv_scores) > 0. 
Example #14
Source File: sklearn_test.py    From nni with MIT License 6 votes vote down vote up
def test():
    url_zip_train = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2'
    urllib.request.urlretrieve(url_zip_train, filename='train.bz2')

    f_svm = open('train.svm', 'wt')
    with bz2.open('train.bz2', 'rb') as f_zip:
        data = f_zip.read()
        f_svm.write(data.decode('utf-8'))
    f_svm.close()


    X, y = load_svmlight_file('train.svm')
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


    pipeline = make_pipeline(FeatureGradientSelector(n_epochs=1, n_features=10), LogisticRegression())
    # pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression())

    pipeline.fit(X_train, y_train)

    print("Pipeline Score: ", pipeline.score(X_train, y_train)) 
Example #15
Source File: test_core_operators.py    From lale with Apache License 2.0 5 votes vote down vote up
def test_comparison_with_scikit(self):
        import warnings
        warnings.filterwarnings("ignore")
        from lale.lib.sklearn import PCA
        import sklearn.datasets
        from lale.helpers import cross_val_score
        pca = PCA(n_components=3, random_state=42, svd_solver='arpack')
        nys = Nystroem(n_components=10, random_state=42)
        concat = ConcatFeatures()
        lr = LogisticRegression(random_state=42, C=0.1)
        trainable = (pca & nys) >> concat >> lr
        digits = sklearn.datasets.load_digits()
        X, y = sklearn.utils.shuffle(digits.data, digits.target, random_state=42)

        cv_results = cross_val_score(trainable, X, y)
        cv_results = ['{0:.1%}'.format(score) for score in cv_results]

        from sklearn.pipeline import make_pipeline, FeatureUnion
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.linear_model import LogisticRegression as SklearnLR
        from sklearn.model_selection import cross_val_score
        union = FeatureUnion([("pca", SklearnPCA(n_components=3, random_state=42, svd_solver='arpack')),
                            ("nys", SklearnNystroem(n_components=10, random_state=42))])
        lr = SklearnLR(random_state=42, C=0.1)
        pipeline = make_pipeline(union, lr)

        scikit_cv_results = cross_val_score(pipeline, X, y, cv = 5)
        scikit_cv_results = ['{0:.1%}'.format(score) for score in scikit_cv_results]
        self.assertEqual(cv_results, scikit_cv_results)
        warnings.resetwarnings() 
Example #16
Source File: ols.py    From bartpy with MIT License 5 votes vote down vote up
def run(n: int=10000, k_true: int=3, k_null: int=2):
    b_true = np.random.uniform(2, 0.1, size=k_true)
    b_true = np.array(list(b_true) + [0.0] * k_null)
    print(b_true)
    x = np.random.normal(0, 1, size=n * (k_true + k_null)).reshape(n, (k_true + k_null))

    X = pd.DataFrame(x)
    y = np.random.normal(0, 0.1, size=n) + np.array(X.multiply(b_true, axis=1).sum(axis=1))

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42,
                                                        shuffle=True)

    model = SklearnModel(n_samples=50,
                         n_burn=50,
                         n_trees=20,
                         store_in_sample_predictions=False,
                         n_jobs=3,
                         n_chains=1)

    pipeline = make_pipeline(SelectNullDistributionThreshold(model, n_permutations=20), model)
    pipeline_model = pipeline.fit(X_train, y_train)
    print("Thresholds", pipeline_model.named_steps["selectnulldistributionthreshold"].thresholds)
    print("Feature Proportions", pipeline_model.named_steps["selectnulldistributionthreshold"].feature_proportions)
    print("Is Kept", pipeline_model.named_steps["selectnulldistributionthreshold"]._get_support_mask())
    pipeline_model.named_steps["selectnulldistributionthreshold"].plot() 
Example #17
Source File: test_core_operators.py    From lale with Apache License 2.0 5 votes vote down vote up
def test_concat_with_hyperopt2(self):
        from lale.operators import make_pipeline, make_union
        from lale.lib.lale import Hyperopt
        pca = PCA(n_components=3)
        nys = Nystroem(n_components=10)
        concat = ConcatFeatures()
        lr = LogisticRegression(random_state=42, C=0.1)

        trainable = make_pipeline(make_union(pca, nys), lr)
        clf = Hyperopt(estimator=trainable, max_evals=2)
        from sklearn.datasets import load_iris
        iris_data = load_iris()
        clf.fit(iris_data.data, iris_data.target)
        clf.predict(iris_data.data) 
Example #18
Source File: sklearn_patches.py    From tslearn with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def check_pipeline_consistency(name, estimator_orig):
    if estimator_orig._get_tags()['non_deterministic']:
        msg = name + ' is non deterministic'
        raise SkipTest(msg)

    # check that make_pipeline(est) gives same score as est
    X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0, n_features=2, cluster_std=0.1)
    X -= X.min()
    X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
    estimator = clone(estimator_orig)
    y = multioutput_estimator_convert_y_2d(estimator, y)
    set_random_state(estimator)
    pipeline = make_pipeline(estimator)
    estimator.fit(X, y)
    pipeline.fit(X, y)

    funcs = ["score", "fit_transform"]

    for func_name in funcs:
        func = getattr(estimator, func_name, None)
        if func is not None:
            func_pipeline = getattr(pipeline, func_name)
            result = func(X, y)
            result_pipe = func_pipeline(X, y)
            assert_allclose_dense_sparse(result, result_pipe) 
Example #19
Source File: tests.py    From scikit-rebate with MIT License 5 votes vote down vote up
def test_multisurf_pipeline_missing_values():
    """Check: Data (Missing Values): MultiSURF works in a sklearn pipeline"""
    np.random.seed(320931)
 
    clf = make_pipeline(MultiSURF(n_features_to_select=2),
                        Imputer(),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))
 
    assert np.mean(cross_val_score(clf, features_missing_values, labels_missing_values, cv=3, n_jobs=-1)) > 0.7 
Example #20
Source File: tests.py    From scikit-rebate with MIT License 5 votes vote down vote up
def test_multisurfstar_pipeline_missing_values():
    """Check: Data (Missing Values): MultiSURF* works in a sklearn pipeline"""
    np.random.seed(320931)
 
    clf = make_pipeline(MultiSURFstar(n_features_to_select=2),
                        Imputer(),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))
 
    assert np.mean(cross_val_score(clf, features_missing_values, labels_missing_values, cv=3, n_jobs=-1)) > 0.7 
Example #21
Source File: tests.py    From scikit-rebate with MIT License 5 votes vote down vote up
def test_surfstar_pipeline_missing_values():
    """Check: Data (Missing Values): SURF* works in a sklearn pipeline"""
    np.random.seed(9238745)
 
    clf = make_pipeline(SURFstar(n_features_to_select=2),
                        Imputer(),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))
 
    assert np.mean(cross_val_score(clf, features_missing_values, labels_missing_values, cv=3, n_jobs=-1)) > 0.7 
Example #22
Source File: tests.py    From scikit-rebate with MIT License 5 votes vote down vote up
def test_surf_pipeline_missing_values():
    """Check: Data (Missing Values): SURF works in a sklearn pipeline"""
    np.random.seed(240932)
 
    clf = make_pipeline(SURF(n_features_to_select=2),
                        Imputer(),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))
 
    assert np.mean(cross_val_score(clf, features_missing_values, labels_missing_values, cv=3, n_jobs=-1)) > 0.7 
Example #23
Source File: tests.py    From scikit-rebate with MIT License 5 votes vote down vote up
def test_relieff_pipeline_missing_values():
    """Check: Data (Missing Values): ReliefF works in a sklearn pipeline"""
    np.random.seed(49082)
 
    clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=10),
                        Imputer(),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))
 
    assert np.mean(cross_val_score(clf, features_missing_values, labels_missing_values, cv=3, n_jobs=-1)) > 0.7 
Example #24
Source File: tests.py    From scikit-rebate with MIT License 5 votes vote down vote up
def test_multisurfstar_pipeline_mixed_attributes():
    """Check: Data (Mixed Attributes): MultiSURF* works in a sklearn pipeline"""
    np.random.seed(320931)
 
    clf = make_pipeline(MultiSURFstar(n_features_to_select=2),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))
 
    assert np.mean(cross_val_score(clf, features_mixed_attributes,
                                   labels_mixed_attributes, cv=3, n_jobs=-1)) > 0.7 
Example #25
Source File: tests.py    From scikit-rebate with MIT License 5 votes vote down vote up
def test_surfstar_pipeline_mixed_attributes():
    """Check: Data (Mixed Attributes): SURF* works in a sklearn pipeline"""
    np.random.seed(9238745)
 
    clf = make_pipeline(SURFstar(n_features_to_select=2),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))
 
    assert np.mean(cross_val_score(clf, features_mixed_attributes,
                                   labels_mixed_attributes, cv=3, n_jobs=-1)) > 0.7 
Example #26
Source File: tests.py    From scikit-rebate with MIT License 5 votes vote down vote up
def test_surf_pipeline_mixed_attributes():
    """Check: Data (Mixed Attributes): SURF works in a sklearn pipeline"""
    np.random.seed(240932)
 
    clf = make_pipeline(SURF(n_features_to_select=2),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))
 
    assert np.mean(cross_val_score(clf, features_mixed_attributes,
                                   labels_mixed_attributes, cv=3, n_jobs=-1)) > 0.7 
Example #27
Source File: tests.py    From scikit-rebate with MIT License 5 votes vote down vote up
def test_relieff_pipeline_mixed_attributes():
    """Check: Data (Mixed Attributes): ReliefF works in a sklearn pipeline"""
    np.random.seed(49082)
 
    clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=10),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))
 
    assert np.mean(cross_val_score(clf, features_mixed_attributes,
                                   labels_mixed_attributes, cv=3, n_jobs=-1)) > 0.7 
Example #28
Source File: tests.py    From scikit-rebate with MIT License 5 votes vote down vote up
def test_multisurf_pipeline_cont_endpoint():
    """Check: Data (Continuous Endpoint): MultiSURF works in a sklearn pipeline"""
    np.random.seed(320931)
 
    clf = make_pipeline(MultiSURF(n_features_to_select=2),
                        RandomForestRegressor(n_estimators=100, n_jobs=-1))
 
    assert abs(np.mean(cross_val_score(clf, features_cont_endpoint, labels_cont_endpoint, cv=3, n_jobs=-1))) < 0.5
 
# Test Mixed Attribute Data ------------------------------------------------------------------------------------ 
Example #29
Source File: test_core_pipeline.py    From lale with Apache License 2.0 5 votes vote down vote up
def test_import_from_sklearn_pipeline1(self):
        from sklearn.decomposition import PCA
        from sklearn.neighbors import KNeighborsClassifier
        from sklearn.pipeline import make_pipeline
        sklearn_pipeline = make_pipeline(PCA(n_components=3), KNeighborsClassifier())
        lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
        for i, pipeline_step in enumerate(sklearn_pipeline.named_steps):
            sklearn_step_params = sklearn_pipeline.named_steps[pipeline_step].get_params()
            lale_sklearn_params = lale_pipeline.steps()[i]._impl._wrapped_model.get_params()
            self.assertEqual(sklearn_step_params, lale_sklearn_params)
        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline) 
Example #30
Source File: tests.py    From scikit-rebate with MIT License 5 votes vote down vote up
def test_surfstar_pipeline_cont_endpoint():
    """Check: Data (Continuous Endpoint): SURF* works in a sklearn pipeline"""
    np.random.seed(9238745)
 
    clf = make_pipeline(SURFstar(n_features_to_select=2),
                        RandomForestRegressor(n_estimators=100, n_jobs=-1))
 
    assert abs(np.mean(cross_val_score(clf, features_cont_endpoint, labels_cont_endpoint, cv=3, n_jobs=-1))) < 0.5