Python sklearn.base.clone() Examples

The following are 30 code examples of sklearn.base.clone(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.base , or try the search function .
Example #1
Source File: fixes.py    From skutil with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
                X, y, scorer, parameter_iterable, fit_params,
                error_score, cv, **kwargs):
        groups = kwargs.pop('groups')

        # test_score, n_samples, parameters
        out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
            delayed(_fit_and_score)(
                clone(base_estimator), X, y, scorer,
                train, test, verbose, parameters,
                fit_params=fit_params,
                return_train_score=False,
                return_n_test_samples=True,
                return_times=False,
                return_parameters=True,
                error_score=error_score)
            for parameters in parameter_iterable
            for train, test in cv.split(X, y, groups))

        # test_score, n_samples, _, parameters
        return [(mod[0], mod[1], None, mod[2]) for mod in out] 
Example #2
Source File: test_dummy.py    From Mastering-Elasticsearch-7.0 with MIT License 7 votes vote down vote up
def _check_behavior_2d(clf):
    # 1d case
    X = np.array([[0], [0], [0], [0]])  # ignored
    y = np.array([1, 2, 1, 1])
    est = clone(clf)
    est.fit(X, y)
    y_pred = est.predict(X)
    assert_equal(y.shape, y_pred.shape)

    # 2d case
    y = np.array([[1, 0],
                  [2, 0],
                  [1, 0],
                  [1, 3]])
    est = clone(clf)
    est.fit(X, y)
    y_pred = est.predict(X)
    assert_equal(y.shape, y_pred.shape) 
Example #3
Source File: feature_bagging.py    From pyod with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _make_estimator(self, append=True, random_state=None):
        """Make and configure a copy of the `base_estimator_` attribute.

        sklearn/base.py

        Warning: This method should be used to properly instantiate new
        sub-estimators.
        """

        # TODO: add a check for estimator_param
        estimator = clone(self.base_estimator_)
        estimator.set_params(**self.estimator_params)

        if random_state is not None:
            _set_random_states(estimator, random_state)

        if append:
            self.estimators_.append(estimator)

        return estimator 
Example #4
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def check_cross_val_predict_multiclass(est, X, y, method):
    """Helper for tests of cross_val_predict with multiclass classification"""
    cv = KFold(n_splits=3, shuffle=False)

    # Generate expected outputs
    float_min = np.finfo(np.float64).min
    default_values = {'decision_function': float_min,
                      'predict_log_proba': float_min,
                      'predict_proba': 0}
    expected_predictions = np.full((len(X), len(set(y))),
                                   default_values[method],
                                   dtype=np.float64)
    _, y_enc = np.unique(y, return_inverse=True)
    for train, test in cv.split(X, y_enc):
        est = clone(est).fit(X[train], y_enc[train])
        fold_preds = getattr(est, method)(X[test])
        i_cols_fit = np.unique(y_enc[train])
        expected_predictions[np.ix_(test, i_cols_fit)] = fold_preds

    # Check actual outputs for several representations of y
    for tg in [y, y + 1, y - 2, y.astype('str')]:
        assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv),
                        expected_predictions) 
Example #5
Source File: test_target.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_transform_target_regressor_2d_transformer_multioutput():
    # Check consistency with transformer accepting only 2D array and a 2D y
    # array.
    X = friedman[0]
    y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
    transformer = StandardScaler()
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      transformer=transformer)
    y_pred = regr.fit(X, y).predict(X)
    assert y.shape == y_pred.shape
    # consistency forward transform
    y_tran = regr.transformer_.transform(y)
    _check_standard_scaled(y, y_tran)
    assert y.shape == y_pred.shape
    # consistency inverse transform
    assert_allclose(y, regr.transformer_.inverse_transform(
        y_tran).squeeze())
    # consistency of the regressor
    lr = LinearRegression()
    transformer2 = clone(transformer)
    lr.fit(X, transformer2.fit_transform(y))
    y_lr_pred = lr.predict(X)
    assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
    assert_allclose(regr.regressor_.coef_, lr.coef_) 
Example #6
Source File: test_pipeline.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_fit_predict_on_pipeline():
    # test that the fit_predict method is implemented on a pipeline
    # test that the fit_predict on pipeline yields same results as applying
    # transform and clustering steps separately
    iris = load_iris()
    scaler = StandardScaler()
    km = KMeans(random_state=0)
    # As pipeline doesn't clone estimators on construction,
    # it must have its own estimators
    scaler_for_pipeline = StandardScaler()
    km_for_pipeline = KMeans(random_state=0)

    # first compute the transform and clustering step separately
    scaled = scaler.fit_transform(iris.data)
    separate_pred = km.fit_predict(scaled)

    # use a pipeline to do the transform and clustering in one step
    pipe = Pipeline([
        ('scaler', scaler_for_pipeline),
        ('Kmeans', km_for_pipeline)
    ])
    pipeline_pred = pipe.fit_predict(iris.data)

    assert_array_almost_equal(pipeline_pred, separate_pred) 
Example #7
Source File: test_multioutput.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_base_chain_random_order():
    # Fit base chain with random order
    X, Y = generate_multilabel_dataset_with_correlations()
    for chain in [ClassifierChain(LogisticRegression()),
                  RegressorChain(Ridge())]:
        chain_random = clone(chain).set_params(order='random', random_state=42)
        chain_random.fit(X, Y)
        chain_fixed = clone(chain).set_params(order=chain_random.order_)
        chain_fixed.fit(X, Y)
        assert_array_equal(chain_fixed.order_, chain_random.order_)
        assert_not_equal(list(chain_random.order), list(range(4)))
        assert_equal(len(chain_random.order_), 4)
        assert_equal(len(set(chain_random.order_)), 4)
        # Randomly ordered chain should behave identically to a fixed order
        # chain with the same order.
        for est1, est2 in zip(chain_random.estimators_,
                              chain_fixed.estimators_):
            assert_array_almost_equal(est1.coef_, est2.coef_) 
Example #8
Source File: test_multioutput.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_base_chain_crossval_fit_and_predict():
    # Fit chain with cross_val_predict and verify predict
    # performance
    X, Y = generate_multilabel_dataset_with_correlations()

    for chain in [ClassifierChain(LogisticRegression()),
                  RegressorChain(Ridge())]:
        chain.fit(X, Y)
        chain_cv = clone(chain).set_params(cv=3)
        chain_cv.fit(X, Y)
        Y_pred_cv = chain_cv.predict(X)
        Y_pred = chain.predict(X)

        assert Y_pred_cv.shape == Y_pred.shape
        assert not np.all(Y_pred == Y_pred_cv)
        if isinstance(chain, ClassifierChain):
            assert jaccard_score(Y, Y_pred_cv, average='samples') > .4
        else:
            assert mean_squared_error(Y, Y_pred_cv) < .25 
Example #9
Source File: test_sag.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_classifier_results():
    """tests if classifier results match target"""
    alpha = .1
    n_features = 20
    n_samples = 10
    tol = .01
    max_iter = 200
    rng = np.random.RandomState(0)
    X = rng.normal(size=(n_samples, n_features))
    w = rng.normal(size=n_features)
    y = np.dot(X, w)
    y = np.sign(y)
    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                              max_iter=max_iter, tol=tol, random_state=77)
    clf2 = clone(clf1)

    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)
    pred1 = clf1.predict(X)
    pred2 = clf2.predict(X)
    assert_almost_equal(pred1, y, decimal=12)
    assert_almost_equal(pred2, y, decimal=12) 
Example #10
Source File: test_k_means.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_weighted_vs_repeated():
    # a sample weight of N should yield the same result as an N-fold
    # repetition of the sample
    rng = np.random.RandomState(0)
    sample_weight = rng.randint(1, 5, size=n_samples)
    X_repeat = np.repeat(X, sample_weight, axis=0)
    estimators = [KMeans(init="k-means++", n_clusters=n_clusters,
                         random_state=42),
                  KMeans(init="random", n_clusters=n_clusters,
                         random_state=42),
                  KMeans(init=centers.copy(), n_clusters=n_clusters,
                         random_state=42),
                  MiniBatchKMeans(n_clusters=n_clusters, batch_size=10,
                                  random_state=42)]
    for estimator in estimators:
        est_weighted = clone(estimator).fit(X, sample_weight=sample_weight)
        est_repeated = clone(estimator).fit(X_repeat)
        repeated_labels = np.repeat(est_weighted.labels_, sample_weight)
        assert_almost_equal(v_measure_score(est_repeated.labels_,
                                            repeated_labels), 1.0)
        if not isinstance(estimator, MiniBatchKMeans):
            assert_almost_equal(_sort_centers(est_weighted.cluster_centers_),
                                _sort_centers(est_repeated.cluster_centers_)) 
Example #11
Source File: test_nmf.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_nmf_sparse_input():
    # Test that sparse matrices are accepted as input
    from scipy.sparse import csc_matrix

    rng = np.random.mtrand.RandomState(42)
    A = np.abs(rng.randn(10, 10))
    A[:, 2 * np.arange(5)] = 0
    A_sparse = csc_matrix(A)

    for solver in ('cd', 'mu'):
        est1 = NMF(solver=solver, n_components=5, init='random',
                   random_state=0, tol=1e-2)
        est2 = clone(est1)

    W1 = est1.fit_transform(A)
    W2 = est2.fit_transform(A_sparse)
    H1 = est1.components_
    H2 = est2.components_

    assert_array_almost_equal(W1, W2)
    assert_array_almost_equal(H1, H2) 
Example #12
Source File: ml_tune.py    From ml-parameter-optimization with MIT License 6 votes vote down vote up
def apply_gridsearch(self,model):
        """
        apply grid search on ml algorithm to specified parameters
        returns updated best score and parameters
        """
        # check if custom evalution function is specified
        if callable(self.params_cv['scoring']):
            scoring = make_scorer(self.params_cv['scoring'],greater_is_better=self._greater_is_better)
        else:
            scoring = self.params_cv['scoring']
        
        gsearch = GridSearchCV(estimator=model,param_grid=self.get_params_tune(),scoring=scoring,
                               iid=self.params_cv['iid'],cv=self.params_cv['cv_folds'],n_jobs=self.params_cv['n_jobs'])
        gsearch.fit(self.X,self.y)
        
        # update best model if best_score is improved
        if (gsearch.best_score_ * self._score_mult) > (self.best_score * self._score_mult):
            self.best_model = clone(gsearch.best_estimator_)
            self.best_score = gsearch.best_score_
        
        # update tuned parameters with optimal values
        for key,value in gsearch.best_params_.items():
            self._params[key] = value
        self._temp_score = gsearch.best_score_
        return self 
Example #13
Source File: cluster.py    From scikit-plot with MIT License 6 votes vote down vote up
def _clone_and_score_clusterer(clf, X, n_clusters):
    """Clones and scores clusterer instance.

    Args:
        clf: Clusterer instance that implements ``fit``,``fit_predict``, and
            ``score`` methods, and an ``n_clusters`` hyperparameter.
            e.g. :class:`sklearn.cluster.KMeans` instance

        X (array-like, shape (n_samples, n_features)):
            Data to cluster, where n_samples is the number of samples and
            n_features is the number of features.

        n_clusters (int): Number of clusters

    Returns:
        score: Score of clusters

        time: Number of seconds it took to fit cluster
    """
    start = time.time()
    clf = clone(clf)
    setattr(clf, 'n_clusters', n_clusters)
    return clf.fit(X).score(X), time.time() - start 
Example #14
Source File: groupby_model.py    From xam with MIT License 6 votes vote down vote up
def fit(self, X, y=None, **fit_params):

        if not isinstance(X, pd.DataFrame):
            raise ValueError('X is not a pandas.DataFrame')

        self.models_ = {}

        columns = self._get_fit_columns(X)

        for key in X[self.by].unique():

            # Copy the model
            model = clone(self.base_model)

            # Select the rows that will be fitted
            mask = (X[self.by] == key).tolist()
            rows = X.index[mask]

            # Fit the model
            model.fit(X.loc[rows, columns], y[mask], **fit_params)

            # Save the model
            self.models_[key] = model

        return self 
Example #15
Source File: test_net.py    From skorch with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def net_fit(self, net_cls, module_cls, dummy_callback, data):
        # Careful, don't call additional fits or set_params on this,
        # since that would have side effects on other tests.
        X, y = data

        # We need a new instance of the net and cannot reuse the net
        # fixture, because otherwise fixture net and net_fit refer to
        # the same object; also, we cannot clone(net) because this
        # will result in the dummy_callback not being the mock anymore
        net = net_cls(
            module_cls,
            callbacks=[('dummy', dummy_callback)],
            max_epochs=10,
            lr=0.1,
        )
        return net.fit(X, y) 
Example #16
Source File: test_net.py    From skorch with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_changing_model_reinitializes_optimizer(self, net, data):
        # The idea is that we change the model using `set_params` to
        # add parameters. Since the optimizer depends on the model
        # parameters it needs to be reinitialized.
        X, y = data

        net.set_params(module__nonlin=nn.ReLU())
        net.fit(X, y)

        net.set_params(module__nonlin=nn.PReLU())
        assert isinstance(net.module_.nonlin, nn.PReLU)
        d1 = net.module_.nonlin.weight.data.clone().cpu().numpy()

        # make sure that we do not initialize again by making sure that
        # the network is initialized and by using partial_fit.
        assert net.initialized_
        net.partial_fit(X, y)
        d2 = net.module_.nonlin.weight.data.clone().cpu().numpy()

        # all newly introduced parameters should have been trained (changed)
        # by the optimizer after 10 epochs.
        assert (abs(d2 - d1) > 1e-05).all() 
Example #17
Source File: parameterize.py    From carl with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, y):
        """Fit estimator on parameterized data.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features+len(params))]:
            The samples, concatenated with the corresponding parameter values.

        * `y` [array-like, shape=(n_samples,)]:
            The output values.

        Returns
        -------
        * `self` [object]:
            `self`.
        """
        self.stacker_ = ParameterStacker(self.params)

        # XXX: this assumes that X is extended with parameters
        self.n_features_ = X.shape[1] - len(self.params)
        self.estimator_ = clone(self.base_estimator).fit(X, y)

        return self 
Example #18
Source File: rfpimp.py    From malss with MIT License 5 votes vote down vote up
def oob_dropcol_importances(rf, X_train, y_train):
    """
    Compute drop-column feature importances for scikit-learn.

    Given a RandomForestClassifier or RandomForestRegressor in rf
    and training X and y data, return a data frame with columns
    Feature and Importance sorted in reverse order by importance.

    A clone of rf is trained once to get the baseline score and then
    again, once per feature to compute the drop in out of bag (OOB)
    score.

    return: A data frame with Feature, Importance columns

    SAMPLE CODE

    rf = RandomForestRegressor(n_estimators=100, n_jobs=-1, oob_score=True)
    X_train, y_train = ..., ...
    rf.fit(X_train, y_train)
    imp = oob_dropcol_importances(rf, X_train, y_train)
    """
    rf_ = clone(rf)
    rf_.random_state = 999
    rf_.fit(X_train, y_train)
    baseline = rf_.oob_score_
    imp = []
    for col in X_train.columns:
        X = X_train.drop(col, axis=1)
        rf_ = clone(rf)
        rf_.random_state = 999
        rf_.fit(X, y_train)
        o = rf_.oob_score_
        imp.append(baseline - o)
    imp = np.array(imp)
    I = pd.DataFrame(data={'Feature':X_train.columns, 'Importance':imp})
    I = I.set_index('Feature')
    I = I.sort_values('Importance', ascending=False)
    return I 
Example #19
Source File: rfpimp.py    From malss with MIT License 5 votes vote down vote up
def oob_dropcol_importances(rf, X_train, y_train):
    """
    Compute drop-column feature importances for scikit-learn.

    Given a RandomForestClassifier or RandomForestRegressor in rf
    and training X and y data, return a data frame with columns
    Feature and Importance sorted in reverse order by importance.

    A clone of rf is trained once to get the baseline score and then
    again, once per feature to compute the drop in out of bag (OOB)
    score.

    return: A data frame with Feature, Importance columns

    SAMPLE CODE

    rf = RandomForestRegressor(n_estimators=100, n_jobs=-1, oob_score=True)
    X_train, y_train = ..., ...
    rf.fit(X_train, y_train)
    imp = oob_dropcol_importances(rf, X_train, y_train)
    """
    rf_ = clone(rf)
    rf_.random_state = 999
    rf_.fit(X_train, y_train)
    baseline = rf_.oob_score_
    imp = []
    for col in X_train.columns:
        X = X_train.drop(col, axis=1)
        rf_ = clone(rf)
        rf_.random_state = 999
        rf_.fit(X, y_train)
        o = rf_.oob_score_
        imp.append(baseline - o)
    imp = np.array(imp)
    I = pd.DataFrame(data={'Feature':X_train.columns, 'Importance':imp})
    I = I.set_index('Feature')
    I = I.sort_values('Importance', ascending=False)
    return I 
Example #20
Source File: sklearn_adapter.py    From libact with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def clone(self):
        return SklearnProbaAdapter(clone(self._model)) 
Example #21
Source File: sklearn_adapter.py    From libact with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def clone(self):
        return SklearnProbaAdapter(clone(self._model)) 
Example #22
Source File: p119_squential_backward_selection.py    From PythonMachineLearningExamples with MIT License 5 votes vote down vote up
def __init__(self, estimator, k_features,
        scoring=accuracy_score,
        test_size=0.25, random_state=1):
        self.scoring = scoring
        self.estimator = clone(estimator)
        self.k_features = k_features
        self.test_size = test_size
        self.random_state = random_state 
Example #23
Source File: p206_majority_vote_classifier.py    From PythonMachineLearningExamples with MIT License 5 votes vote down vote up
def fit(self, X, y):
        """ Fit classifiers.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Matrix of training samples.

        y : array-like, shape = [n_samples]
            Vector of target class labels.

        Returns
        -------
        self : object

        """
        if self.vote not in ('probability', 'classlabel'):
            raise ValueError("vote must be 'probability' or 'classlabel'"
                             "; got (vote=%r)"
                             % vote)

        if self.weights and len(self.weights) != len(self.classifiers):
            raise ValueError('Number of classifiers and weights must be equal'
                             '; got %d weights, %d classifiers'
                             % (len(self.weights), len(self.classifiers)))

        # Use LabelEncoder to ensure class labels start with 0, which
        # is important for np.argmax call in self.predict
        self.lablenc_ = LabelEncoder()
        self.lablenc_.fit(y)
        self.classes_ = self.lablenc_.classes_
        self.classifiers_ = []
        for clf in self.classifiers:
            fitted_clf = clone(clf).fit(X, self.lablenc_.transform(y))
            self.classifiers_.append(fitted_clf)
        return self 
Example #24
Source File: fixes.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
                X, y, scorer, parameter_iterable, fit_params,
                error_score, cv, **kwargs):
        # test_score, n_samples, score_time, parameters
        return Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
            delayed(_fit_and_score)(
                clone(base_estimator), X, y, scorer,
                train, test, verbose, parameters,
                fit_params, return_parameters=True,
                error_score=error_score)
            for parameters in parameter_iterable
            for train, test in cv) 
Example #25
Source File: grid_search.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _clone_h2o_obj(estimator, ignore=False, **kwargs):
    # do initial clone
    est = clone(estimator)

    # set kwargs:
    if kwargs:
        for k, v in six.iteritems(kwargs):
            setattr(est, k, v)

    # check on h2o estimator
    if isinstance(estimator, H2OPipeline):
        # the last step from the original estimator
        e = estimator.steps[-1][1]
        if isinstance(e, H2OEstimator):
            last_step = est.steps[-1][1]

            # so it's the last step
            for k, v in six.iteritems(e._parms):
                k, v = _kv_str(k, v)

                # if (not k in PARM_IGNORE) and (not v is None):
                #   e._parms[k] = v
                last_step._parms[k] = v

                # otherwise it's an BaseH2OFunctionWrapper
    return est 
Example #26
Source File: grid_search.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _new_base_estimator(est, clonable_kwargs):
    """When the grid searches are pickled, the estimator
    has to be dropped out. When we load it back in, we have
    to reinstate a new one, since the fit is predicated on
    being able to clone a base estimator, we've got to have
    an estimator to clone and fit.

    Parameters
    ----------

    est : str
        The type of model to build

    Returns
    -------

    estimator : H2OEstimator
        The cloned base estimator
    """
    est_map = {
        'dl':   H2ODeepLearningEstimator,
        'gbm':  H2OGradientBoostingEstimator,
        'glm':  H2OGeneralizedLinearEstimator,
        # 'glrm': H2OGeneralizedLowRankEstimator,
        # 'km'  : H2OKMeansEstimator,
        'nb':   H2ONaiveBayesEstimator,
        'rf':   H2ORandomForestEstimator
    }

    estimator = est_map[est]()  # initialize the new ones
    for k, v in six.iteritems(clonable_kwargs):
        k, v = _kv_str(k, v)
        estimator._parms[k] = v

    return estimator 
Example #27
Source File: test_estimators.py    From pmdarima with MIT License 5 votes vote down vote up
def test_clonable(est):
    # fit it, then clone it
    est.fit(y)
    est2 = clone(est)
    assert isinstance(est2, est.__class__)
    assert est is not est2 
Example #28
Source File: mnist.py    From mlens with MIT License 5 votes vote down vote up
def build_ensemble(cls, **kwargs):
    """Build ML-Ensemble"""
    ens = cls(**kwargs)

    use = ["ExtraTrees", "RandomForest",
           "LogisticRegression-SAG", "MLP-adam"]

    meta = RandomForestClassifier(n_estimators=100,
                                  random_state=0,
                                  n_jobs=-1)
    base_learners = list()
    for est_name, est in ESTIMATORS.items():
        e = clone(est)
        if est_name not in use:
            continue
        elif est_name == "MLP-adam":
            e.verbose = False
        try:
            e.set_params(**{'n_jobs': 1})
        except ValueError:
            pass

        base_learners.append((est_name, e))
    ens.add(base_learners, proba=True, shuffle=True, random_state=1)
    ens.add_meta(meta, shuffle=True, random_state=2)
    return ens 
Example #29
Source File: target_encoder.py    From nyaggle with MIT License 5 votes vote down vote up
def _pre_train(self, y):
        self.cv = check_cv(self.cv, y)
        self.n_splits = self.cv.get_n_splits()
        self.transformers = [clone(self.base_transformer) for _ in range(self.n_splits + 1)] 
Example #30
Source File: utils.py    From m2cgen with MIT License 5 votes vote down vote up
def cartesian_e2e_params(executors_with_marks, models_with_trainers_with_marks,
                         *additional_params):
    result_params = list(additional_params)

    # Specifying None for additional parameters makes pytest to generate
    # automatic ids. If we don't do this pytest will throw exception that
    # number of parameters doesn't match number of provided ids
    ids = [None] * len(additional_params)

    prod = itertools.product(
        executors_with_marks, models_with_trainers_with_marks)

    for (executor, executor_mark), (model, trainer, trainer_mark) in prod:
        # Since we reuse the same model across multiple tests we want it
        # to be clean.
        model = clone(model)

        # We use custom id since pytest for some reason can't show name of
        # the model in the automatic id. Which sucks.
        ids.append(f"{_get_full_model_name(model)} - "
                   f"{executor_mark.name} - {trainer.name}")

        result_params.append(pytest.param(
            model, executor, trainer, marks=[executor_mark, trainer_mark],
        ))

    param_names = "estimator,executor_cls,model_trainer"

    def wrap(func):

        @pytest.mark.parametrize(param_names, result_params, ids=ids)
        @functools.wraps(func)
        def inner(*args, **kwarg):
            return func(*args, **kwarg)

        return inner

    return wrap