Python sklearn.pipeline.make_union() Examples

The following are 8 code examples of sklearn.pipeline.make_union(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.pipeline , or try the search function .
Example #1
Source File: test_impute.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_missing_indicator_with_imputer(X, missing_values, X_trans_exp):
    trans = make_union(
        SimpleImputer(missing_values=missing_values, strategy='most_frequent'),
        MissingIndicator(missing_values=missing_values)
    )
    X_trans = trans.fit_transform(X)
    assert_array_equal(X_trans, X_trans_exp) 
Example #2
Source File: test_pipeline.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_make_union():
    pca = PCA(svd_solver='full')
    mock = Transf()
    fu = make_union(pca, mock)
    names, transformers = zip(*fu.transformer_list)
    assert_equal(names, ("pca", "transf"))
    assert_equal(transformers, (pca, mock)) 
Example #3
Source File: test_pipeline.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_make_union_kwargs():
    pca = PCA(svd_solver='full')
    mock = Transf()
    fu = make_union(pca, mock, n_jobs=3)
    assert_equal(fu.transformer_list, make_union(pca, mock).transformer_list)
    assert_equal(3, fu.n_jobs)
    # invalid keyword parameters should raise an error message
    assert_raise_message(
        TypeError,
        'Unknown keyword arguments: "transformer_weights"',
        make_union, pca, mock, transformer_weights={'pca': 10, 'Transf': 1}
    ) 
Example #4
Source File: mercari_golf.py    From mercari-solution with MIT License 5 votes vote down vote up
def main():
    vectorizer = make_union(
        on_field('name', Tfidf(max_features=100000, token_pattern='\w+')),
        on_field('text', Tfidf(max_features=100000, token_pattern='\w+', ngram_range=(1, 2))),
        on_field(['shipping', 'item_condition_id'],
                 FunctionTransformer(to_records, validate=False), DictVectorizer()),
        n_jobs=4)
    y_scaler = StandardScaler()
    with timer('process train'):
        train = pd.read_table('../input/train.tsv')
        train = train[train['price'] > 0].reset_index(drop=True)
        cv = KFold(n_splits=20, shuffle=True, random_state=42)
        train_ids, valid_ids = next(cv.split(train))
        train, valid = train.iloc[train_ids], train.iloc[valid_ids]
        y_train = y_scaler.fit_transform(np.log1p(train['price'].values.reshape(-1, 1)))
        X_train = vectorizer.fit_transform(preprocess(train)).astype(np.float32)
        print(f'X_train: {X_train.shape} of {X_train.dtype}')
        del train
    with timer('process valid'):
        X_valid = vectorizer.transform(preprocess(valid)).astype(np.float32)
    with ThreadPool(processes=4) as pool:
        Xb_train, Xb_valid = [x.astype(np.bool).astype(np.float32) for x in [X_train, X_valid]]
        xs = [[Xb_train, Xb_valid], [X_train, X_valid]] * 2
        y_pred = np.mean(pool.map(partial(fit_predict, y_train=y_train), xs), axis=0)
    y_pred = np.expm1(y_scaler.inverse_transform(y_pred.reshape(-1, 1))[:, 0])
    print('Valid RMSLE: {:.4f}'.format(np.sqrt(mean_squared_log_error(valid['price'], y_pred)))) 
Example #5
Source File: main.py    From LearnProgrammingBot with MIT License 5 votes vote down vote up
def __init__(self, training_values=None, training_targets=None):
        self.vectorizer = make_union(TfidfVectorizer(), PostTransformer())
        # Set using parameter_search. TODO: review after updating
        # corpus.
        self.classifier = svm.LinearSVC(C=1, loss='squared_hinge', multi_class='ovr', class_weight='balanced', tol=1e-6)
        if training_values is not None and training_targets is not None:
            self.fit(training_values, training_targets) 
Example #6
Source File: test_pipeline.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.pipeline.Pipeline, pipeline.Pipeline)
        self.assertIs(df.pipeline.FeatureUnion, pipeline.FeatureUnion)
        self.assertIs(df.pipeline.make_pipeline, pipeline.make_pipeline)
        self.assertIs(df.pipeline.make_union, pipeline.make_union) 
Example #7
Source File: test_pipeline.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_make_union():
    pca = PCA(svd_solver='full')
    mock = Transf()
    fu = make_union(pca, mock)
    names, transformers = zip(*fu.transformer_list)
    assert_equal(names, ("pca", "transf"))
    assert_equal(transformers, (pca, mock)) 
Example #8
Source File: test_pipeline.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_make_union_kwargs():
    pca = PCA(svd_solver='full')
    mock = Transf()
    fu = make_union(pca, mock, n_jobs=3)
    assert_equal(fu.transformer_list, make_union(pca, mock).transformer_list)
    assert_equal(3, fu.n_jobs)
    # invalid keyword parameters should raise an error message
    assert_raise_message(
        TypeError,
        'Unknown keyword arguments: "transformer_weights"',
        make_union, pca, mock, transformer_weights={'pca': 10, 'Transf': 1}
    )