Python sklearn.base.clone() Examples
The following are 30
code examples of sklearn.base.clone().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.base
, or try the search function
.

Example #1
Source File: fixes.py From skutil with BSD 3-Clause "New" or "Revised" License | 7 votes |
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator, X, y, scorer, parameter_iterable, fit_params, error_score, cv, **kwargs): groups = kwargs.pop('groups') # test_score, n_samples, parameters out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)( delayed(_fit_and_score)( clone(base_estimator), X, y, scorer, train, test, verbose, parameters, fit_params=fit_params, return_train_score=False, return_n_test_samples=True, return_times=False, return_parameters=True, error_score=error_score) for parameters in parameter_iterable for train, test in cv.split(X, y, groups)) # test_score, n_samples, _, parameters return [(mod[0], mod[1], None, mod[2]) for mod in out]
Example #2
Source File: test_dummy.py From Mastering-Elasticsearch-7.0 with MIT License | 7 votes |
def _check_behavior_2d(clf): # 1d case X = np.array([[0], [0], [0], [0]]) # ignored y = np.array([1, 2, 1, 1]) est = clone(clf) est.fit(X, y) y_pred = est.predict(X) assert_equal(y.shape, y_pred.shape) # 2d case y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]]) est = clone(clf) est.fit(X, y) y_pred = est.predict(X) assert_equal(y.shape, y_pred.shape)
Example #3
Source File: feature_bagging.py From pyod with BSD 2-Clause "Simplified" License | 6 votes |
def _make_estimator(self, append=True, random_state=None): """Make and configure a copy of the `base_estimator_` attribute. sklearn/base.py Warning: This method should be used to properly instantiate new sub-estimators. """ # TODO: add a check for estimator_param estimator = clone(self.base_estimator_) estimator.set_params(**self.estimator_params) if random_state is not None: _set_random_states(estimator, random_state) if append: self.estimators_.append(estimator) return estimator
Example #4
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def check_cross_val_predict_multiclass(est, X, y, method): """Helper for tests of cross_val_predict with multiclass classification""" cv = KFold(n_splits=3, shuffle=False) # Generate expected outputs float_min = np.finfo(np.float64).min default_values = {'decision_function': float_min, 'predict_log_proba': float_min, 'predict_proba': 0} expected_predictions = np.full((len(X), len(set(y))), default_values[method], dtype=np.float64) _, y_enc = np.unique(y, return_inverse=True) for train, test in cv.split(X, y_enc): est = clone(est).fit(X[train], y_enc[train]) fold_preds = getattr(est, method)(X[test]) i_cols_fit = np.unique(y_enc[train]) expected_predictions[np.ix_(test, i_cols_fit)] = fold_preds # Check actual outputs for several representations of y for tg in [y, y + 1, y - 2, y.astype('str')]: assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv), expected_predictions)
Example #5
Source File: test_target.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_transform_target_regressor_2d_transformer_multioutput(): # Check consistency with transformer accepting only 2D array and a 2D y # array. X = friedman[0] y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T transformer = StandardScaler() regr = TransformedTargetRegressor(regressor=LinearRegression(), transformer=transformer) y_pred = regr.fit(X, y).predict(X) assert y.shape == y_pred.shape # consistency forward transform y_tran = regr.transformer_.transform(y) _check_standard_scaled(y, y_tran) assert y.shape == y_pred.shape # consistency inverse transform assert_allclose(y, regr.transformer_.inverse_transform( y_tran).squeeze()) # consistency of the regressor lr = LinearRegression() transformer2 = clone(transformer) lr.fit(X, transformer2.fit_transform(y)) y_lr_pred = lr.predict(X) assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred)) assert_allclose(regr.regressor_.coef_, lr.coef_)
Example #6
Source File: test_pipeline.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_fit_predict_on_pipeline(): # test that the fit_predict method is implemented on a pipeline # test that the fit_predict on pipeline yields same results as applying # transform and clustering steps separately iris = load_iris() scaler = StandardScaler() km = KMeans(random_state=0) # As pipeline doesn't clone estimators on construction, # it must have its own estimators scaler_for_pipeline = StandardScaler() km_for_pipeline = KMeans(random_state=0) # first compute the transform and clustering step separately scaled = scaler.fit_transform(iris.data) separate_pred = km.fit_predict(scaled) # use a pipeline to do the transform and clustering in one step pipe = Pipeline([ ('scaler', scaler_for_pipeline), ('Kmeans', km_for_pipeline) ]) pipeline_pred = pipe.fit_predict(iris.data) assert_array_almost_equal(pipeline_pred, separate_pred)
Example #7
Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_base_chain_random_order(): # Fit base chain with random order X, Y = generate_multilabel_dataset_with_correlations() for chain in [ClassifierChain(LogisticRegression()), RegressorChain(Ridge())]: chain_random = clone(chain).set_params(order='random', random_state=42) chain_random.fit(X, Y) chain_fixed = clone(chain).set_params(order=chain_random.order_) chain_fixed.fit(X, Y) assert_array_equal(chain_fixed.order_, chain_random.order_) assert_not_equal(list(chain_random.order), list(range(4))) assert_equal(len(chain_random.order_), 4) assert_equal(len(set(chain_random.order_)), 4) # Randomly ordered chain should behave identically to a fixed order # chain with the same order. for est1, est2 in zip(chain_random.estimators_, chain_fixed.estimators_): assert_array_almost_equal(est1.coef_, est2.coef_)
Example #8
Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_base_chain_crossval_fit_and_predict(): # Fit chain with cross_val_predict and verify predict # performance X, Y = generate_multilabel_dataset_with_correlations() for chain in [ClassifierChain(LogisticRegression()), RegressorChain(Ridge())]: chain.fit(X, Y) chain_cv = clone(chain).set_params(cv=3) chain_cv.fit(X, Y) Y_pred_cv = chain_cv.predict(X) Y_pred = chain.predict(X) assert Y_pred_cv.shape == Y_pred.shape assert not np.all(Y_pred == Y_pred_cv) if isinstance(chain, ClassifierChain): assert jaccard_score(Y, Y_pred_cv, average='samples') > .4 else: assert mean_squared_error(Y, Y_pred_cv) < .25
Example #9
Source File: test_sag.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_classifier_results(): """tests if classifier results match target""" alpha = .1 n_features = 20 n_samples = 10 tol = .01 max_iter = 200 rng = np.random.RandomState(0) X = rng.normal(size=(n_samples, n_features)) w = rng.normal(size=n_features) y = np.dot(X, w) y = np.sign(y) clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=max_iter, tol=tol, random_state=77) clf2 = clone(clf1) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert_almost_equal(pred1, y, decimal=12) assert_almost_equal(pred2, y, decimal=12)
Example #10
Source File: test_k_means.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_weighted_vs_repeated(): # a sample weight of N should yield the same result as an N-fold # repetition of the sample rng = np.random.RandomState(0) sample_weight = rng.randint(1, 5, size=n_samples) X_repeat = np.repeat(X, sample_weight, axis=0) estimators = [KMeans(init="k-means++", n_clusters=n_clusters, random_state=42), KMeans(init="random", n_clusters=n_clusters, random_state=42), KMeans(init=centers.copy(), n_clusters=n_clusters, random_state=42), MiniBatchKMeans(n_clusters=n_clusters, batch_size=10, random_state=42)] for estimator in estimators: est_weighted = clone(estimator).fit(X, sample_weight=sample_weight) est_repeated = clone(estimator).fit(X_repeat) repeated_labels = np.repeat(est_weighted.labels_, sample_weight) assert_almost_equal(v_measure_score(est_repeated.labels_, repeated_labels), 1.0) if not isinstance(estimator, MiniBatchKMeans): assert_almost_equal(_sort_centers(est_weighted.cluster_centers_), _sort_centers(est_repeated.cluster_centers_))
Example #11
Source File: test_nmf.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_nmf_sparse_input(): # Test that sparse matrices are accepted as input from scipy.sparse import csc_matrix rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(10, 10)) A[:, 2 * np.arange(5)] = 0 A_sparse = csc_matrix(A) for solver in ('cd', 'mu'): est1 = NMF(solver=solver, n_components=5, init='random', random_state=0, tol=1e-2) est2 = clone(est1) W1 = est1.fit_transform(A) W2 = est2.fit_transform(A_sparse) H1 = est1.components_ H2 = est2.components_ assert_array_almost_equal(W1, W2) assert_array_almost_equal(H1, H2)
Example #12
Source File: ml_tune.py From ml-parameter-optimization with MIT License | 6 votes |
def apply_gridsearch(self,model): """ apply grid search on ml algorithm to specified parameters returns updated best score and parameters """ # check if custom evalution function is specified if callable(self.params_cv['scoring']): scoring = make_scorer(self.params_cv['scoring'],greater_is_better=self._greater_is_better) else: scoring = self.params_cv['scoring'] gsearch = GridSearchCV(estimator=model,param_grid=self.get_params_tune(),scoring=scoring, iid=self.params_cv['iid'],cv=self.params_cv['cv_folds'],n_jobs=self.params_cv['n_jobs']) gsearch.fit(self.X,self.y) # update best model if best_score is improved if (gsearch.best_score_ * self._score_mult) > (self.best_score * self._score_mult): self.best_model = clone(gsearch.best_estimator_) self.best_score = gsearch.best_score_ # update tuned parameters with optimal values for key,value in gsearch.best_params_.items(): self._params[key] = value self._temp_score = gsearch.best_score_ return self
Example #13
Source File: cluster.py From scikit-plot with MIT License | 6 votes |
def _clone_and_score_clusterer(clf, X, n_clusters): """Clones and scores clusterer instance. Args: clf: Clusterer instance that implements ``fit``,``fit_predict``, and ``score`` methods, and an ``n_clusters`` hyperparameter. e.g. :class:`sklearn.cluster.KMeans` instance X (array-like, shape (n_samples, n_features)): Data to cluster, where n_samples is the number of samples and n_features is the number of features. n_clusters (int): Number of clusters Returns: score: Score of clusters time: Number of seconds it took to fit cluster """ start = time.time() clf = clone(clf) setattr(clf, 'n_clusters', n_clusters) return clf.fit(X).score(X), time.time() - start
Example #14
Source File: groupby_model.py From xam with MIT License | 6 votes |
def fit(self, X, y=None, **fit_params): if not isinstance(X, pd.DataFrame): raise ValueError('X is not a pandas.DataFrame') self.models_ = {} columns = self._get_fit_columns(X) for key in X[self.by].unique(): # Copy the model model = clone(self.base_model) # Select the rows that will be fitted mask = (X[self.by] == key).tolist() rows = X.index[mask] # Fit the model model.fit(X.loc[rows, columns], y[mask], **fit_params) # Save the model self.models_[key] = model return self
Example #15
Source File: test_net.py From skorch with BSD 3-Clause "New" or "Revised" License | 6 votes |
def net_fit(self, net_cls, module_cls, dummy_callback, data): # Careful, don't call additional fits or set_params on this, # since that would have side effects on other tests. X, y = data # We need a new instance of the net and cannot reuse the net # fixture, because otherwise fixture net and net_fit refer to # the same object; also, we cannot clone(net) because this # will result in the dummy_callback not being the mock anymore net = net_cls( module_cls, callbacks=[('dummy', dummy_callback)], max_epochs=10, lr=0.1, ) return net.fit(X, y)
Example #16
Source File: test_net.py From skorch with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_changing_model_reinitializes_optimizer(self, net, data): # The idea is that we change the model using `set_params` to # add parameters. Since the optimizer depends on the model # parameters it needs to be reinitialized. X, y = data net.set_params(module__nonlin=nn.ReLU()) net.fit(X, y) net.set_params(module__nonlin=nn.PReLU()) assert isinstance(net.module_.nonlin, nn.PReLU) d1 = net.module_.nonlin.weight.data.clone().cpu().numpy() # make sure that we do not initialize again by making sure that # the network is initialized and by using partial_fit. assert net.initialized_ net.partial_fit(X, y) d2 = net.module_.nonlin.weight.data.clone().cpu().numpy() # all newly introduced parameters should have been trained (changed) # by the optimizer after 10 epochs. assert (abs(d2 - d1) > 1e-05).all()
Example #17
Source File: parameterize.py From carl with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, X, y): """Fit estimator on parameterized data. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values. * `y` [array-like, shape=(n_samples,)]: The output values. Returns ------- * `self` [object]: `self`. """ self.stacker_ = ParameterStacker(self.params) # XXX: this assumes that X is extended with parameters self.n_features_ = X.shape[1] - len(self.params) self.estimator_ = clone(self.base_estimator).fit(X, y) return self
Example #18
Source File: rfpimp.py From malss with MIT License | 5 votes |
def oob_dropcol_importances(rf, X_train, y_train): """ Compute drop-column feature importances for scikit-learn. Given a RandomForestClassifier or RandomForestRegressor in rf and training X and y data, return a data frame with columns Feature and Importance sorted in reverse order by importance. A clone of rf is trained once to get the baseline score and then again, once per feature to compute the drop in out of bag (OOB) score. return: A data frame with Feature, Importance columns SAMPLE CODE rf = RandomForestRegressor(n_estimators=100, n_jobs=-1, oob_score=True) X_train, y_train = ..., ... rf.fit(X_train, y_train) imp = oob_dropcol_importances(rf, X_train, y_train) """ rf_ = clone(rf) rf_.random_state = 999 rf_.fit(X_train, y_train) baseline = rf_.oob_score_ imp = [] for col in X_train.columns: X = X_train.drop(col, axis=1) rf_ = clone(rf) rf_.random_state = 999 rf_.fit(X, y_train) o = rf_.oob_score_ imp.append(baseline - o) imp = np.array(imp) I = pd.DataFrame(data={'Feature':X_train.columns, 'Importance':imp}) I = I.set_index('Feature') I = I.sort_values('Importance', ascending=False) return I
Example #19
Source File: rfpimp.py From malss with MIT License | 5 votes |
def oob_dropcol_importances(rf, X_train, y_train): """ Compute drop-column feature importances for scikit-learn. Given a RandomForestClassifier or RandomForestRegressor in rf and training X and y data, return a data frame with columns Feature and Importance sorted in reverse order by importance. A clone of rf is trained once to get the baseline score and then again, once per feature to compute the drop in out of bag (OOB) score. return: A data frame with Feature, Importance columns SAMPLE CODE rf = RandomForestRegressor(n_estimators=100, n_jobs=-1, oob_score=True) X_train, y_train = ..., ... rf.fit(X_train, y_train) imp = oob_dropcol_importances(rf, X_train, y_train) """ rf_ = clone(rf) rf_.random_state = 999 rf_.fit(X_train, y_train) baseline = rf_.oob_score_ imp = [] for col in X_train.columns: X = X_train.drop(col, axis=1) rf_ = clone(rf) rf_.random_state = 999 rf_.fit(X, y_train) o = rf_.oob_score_ imp.append(baseline - o) imp = np.array(imp) I = pd.DataFrame(data={'Feature':X_train.columns, 'Importance':imp}) I = I.set_index('Feature') I = I.sort_values('Importance', ascending=False) return I
Example #20
Source File: sklearn_adapter.py From libact with BSD 2-Clause "Simplified" License | 5 votes |
def clone(self): return SklearnProbaAdapter(clone(self._model))
Example #21
Source File: sklearn_adapter.py From libact with BSD 2-Clause "Simplified" License | 5 votes |
def clone(self): return SklearnProbaAdapter(clone(self._model))
Example #22
Source File: p119_squential_backward_selection.py From PythonMachineLearningExamples with MIT License | 5 votes |
def __init__(self, estimator, k_features, scoring=accuracy_score, test_size=0.25, random_state=1): self.scoring = scoring self.estimator = clone(estimator) self.k_features = k_features self.test_size = test_size self.random_state = random_state
Example #23
Source File: p206_majority_vote_classifier.py From PythonMachineLearningExamples with MIT License | 5 votes |
def fit(self, X, y): """ Fit classifiers. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Matrix of training samples. y : array-like, shape = [n_samples] Vector of target class labels. Returns ------- self : object """ if self.vote not in ('probability', 'classlabel'): raise ValueError("vote must be 'probability' or 'classlabel'" "; got (vote=%r)" % vote) if self.weights and len(self.weights) != len(self.classifiers): raise ValueError('Number of classifiers and weights must be equal' '; got %d weights, %d classifiers' % (len(self.weights), len(self.classifiers))) # Use LabelEncoder to ensure class labels start with 0, which # is important for np.argmax call in self.predict self.lablenc_ = LabelEncoder() self.lablenc_.fit(y) self.classes_ = self.lablenc_.classes_ self.classifiers_ = [] for clf in self.classifiers: fitted_clf = clone(clf).fit(X, self.lablenc_.transform(y)) self.classifiers_.append(fitted_clf) return self
Example #24
Source File: fixes.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator, X, y, scorer, parameter_iterable, fit_params, error_score, cv, **kwargs): # test_score, n_samples, score_time, parameters return Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)( delayed(_fit_and_score)( clone(base_estimator), X, y, scorer, train, test, verbose, parameters, fit_params, return_parameters=True, error_score=error_score) for parameters in parameter_iterable for train, test in cv)
Example #25
Source File: grid_search.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _clone_h2o_obj(estimator, ignore=False, **kwargs): # do initial clone est = clone(estimator) # set kwargs: if kwargs: for k, v in six.iteritems(kwargs): setattr(est, k, v) # check on h2o estimator if isinstance(estimator, H2OPipeline): # the last step from the original estimator e = estimator.steps[-1][1] if isinstance(e, H2OEstimator): last_step = est.steps[-1][1] # so it's the last step for k, v in six.iteritems(e._parms): k, v = _kv_str(k, v) # if (not k in PARM_IGNORE) and (not v is None): # e._parms[k] = v last_step._parms[k] = v # otherwise it's an BaseH2OFunctionWrapper return est
Example #26
Source File: grid_search.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _new_base_estimator(est, clonable_kwargs): """When the grid searches are pickled, the estimator has to be dropped out. When we load it back in, we have to reinstate a new one, since the fit is predicated on being able to clone a base estimator, we've got to have an estimator to clone and fit. Parameters ---------- est : str The type of model to build Returns ------- estimator : H2OEstimator The cloned base estimator """ est_map = { 'dl': H2ODeepLearningEstimator, 'gbm': H2OGradientBoostingEstimator, 'glm': H2OGeneralizedLinearEstimator, # 'glrm': H2OGeneralizedLowRankEstimator, # 'km' : H2OKMeansEstimator, 'nb': H2ONaiveBayesEstimator, 'rf': H2ORandomForestEstimator } estimator = est_map[est]() # initialize the new ones for k, v in six.iteritems(clonable_kwargs): k, v = _kv_str(k, v) estimator._parms[k] = v return estimator
Example #27
Source File: test_estimators.py From pmdarima with MIT License | 5 votes |
def test_clonable(est): # fit it, then clone it est.fit(y) est2 = clone(est) assert isinstance(est2, est.__class__) assert est is not est2
Example #28
Source File: mnist.py From mlens with MIT License | 5 votes |
def build_ensemble(cls, **kwargs): """Build ML-Ensemble""" ens = cls(**kwargs) use = ["ExtraTrees", "RandomForest", "LogisticRegression-SAG", "MLP-adam"] meta = RandomForestClassifier(n_estimators=100, random_state=0, n_jobs=-1) base_learners = list() for est_name, est in ESTIMATORS.items(): e = clone(est) if est_name not in use: continue elif est_name == "MLP-adam": e.verbose = False try: e.set_params(**{'n_jobs': 1}) except ValueError: pass base_learners.append((est_name, e)) ens.add(base_learners, proba=True, shuffle=True, random_state=1) ens.add_meta(meta, shuffle=True, random_state=2) return ens
Example #29
Source File: target_encoder.py From nyaggle with MIT License | 5 votes |
def _pre_train(self, y): self.cv = check_cv(self.cv, y) self.n_splits = self.cv.get_n_splits() self.transformers = [clone(self.base_transformer) for _ in range(self.n_splits + 1)]
Example #30
Source File: utils.py From m2cgen with MIT License | 5 votes |
def cartesian_e2e_params(executors_with_marks, models_with_trainers_with_marks, *additional_params): result_params = list(additional_params) # Specifying None for additional parameters makes pytest to generate # automatic ids. If we don't do this pytest will throw exception that # number of parameters doesn't match number of provided ids ids = [None] * len(additional_params) prod = itertools.product( executors_with_marks, models_with_trainers_with_marks) for (executor, executor_mark), (model, trainer, trainer_mark) in prod: # Since we reuse the same model across multiple tests we want it # to be clean. model = clone(model) # We use custom id since pytest for some reason can't show name of # the model in the automatic id. Which sucks. ids.append(f"{_get_full_model_name(model)} - " f"{executor_mark.name} - {trainer.name}") result_params.append(pytest.param( model, executor, trainer, marks=[executor_mark, trainer_mark], )) param_names = "estimator,executor_cls,model_trainer" def wrap(func): @pytest.mark.parametrize(param_names, result_params, ids=ids) @functools.wraps(func) def inner(*args, **kwarg): return func(*args, **kwarg) return inner return wrap