Python sklearn.base.clone() Examples
Example #1
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 7 votes |
def _check_behavior_2d(clf): # 1d case X = np.array([[0], [0], [0], [0]]) # ignored y = np.array([1, 2, 1, 1]) est = clone(clf), y) y_pred = est.predict(X) assert_equal(y.shape, y_pred.shape) # 2d case y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]]) est = clone(clf), y) y_pred = est.predict(X) assert_equal(y.shape, y_pred.shape)
Example #2
Source File: From skutil with BSD 3-Clause "New" or "Revised" License | 7 votes |
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator, X, y, scorer, parameter_iterable, fit_params, error_score, cv, **kwargs): groups = kwargs.pop('groups') # test_score, n_samples, parameters out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)( delayed(_fit_and_score)( clone(base_estimator), X, y, scorer, train, test, verbose, parameters, fit_params=fit_params, return_train_score=False, return_n_test_samples=True, return_times=False, return_parameters=True, error_score=error_score) for parameters in parameter_iterable for train, test in cv.split(X, y, groups)) # test_score, n_samples, _, parameters return [(mod[0], mod[1], None, mod[2]) for mod in out]
Example #3
Source File: From scikit-plot with MIT License | 6 votes |
def _clone_and_score_clusterer(clf, X, n_clusters): """Clones and scores clusterer instance. Args: clf: Clusterer instance that implements ``fit``,``fit_predict``, and ``score`` methods, and an ``n_clusters`` hyperparameter. e.g. :class:`sklearn.cluster.KMeans` instance X (array-like, shape (n_samples, n_features)): Data to cluster, where n_samples is the number of samples and n_features is the number of features. n_clusters (int): Number of clusters Returns: score: Score of clusters time: Number of seconds it took to fit cluster """ start = time.time() clf = clone(clf) setattr(clf, 'n_clusters', n_clusters) return, time.time() - start
Example #4
Source File: From skorch with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_changing_model_reinitializes_optimizer(self, net, data): # The idea is that we change the model using `set_params` to # add parameters. Since the optimizer depends on the model # parameters it needs to be reinitialized. X, y = data net.set_params(module__nonlin=nn.ReLU()), y) net.set_params(module__nonlin=nn.PReLU()) assert isinstance(net.module_.nonlin, nn.PReLU) d1 = # make sure that we do not initialize again by making sure that # the network is initialized and by using partial_fit. assert net.initialized_ net.partial_fit(X, y) d2 = # all newly introduced parameters should have been trained (changed) # by the optimizer after 10 epochs. assert (abs(d2 - d1) > 1e-05).all()
Example #5
Source File: From skorch with BSD 3-Clause "New" or "Revised" License | 6 votes |
def net_fit(self, net_cls, module_cls, dummy_callback, data): # Careful, don't call additional fits or set_params on this, # since that would have side effects on other tests. X, y = data # We need a new instance of the net and cannot reuse the net # fixture, because otherwise fixture net and net_fit refer to # the same object; also, we cannot clone(net) because this # will result in the dummy_callback not being the mock anymore net = net_cls( module_cls, callbacks=[('dummy', dummy_callback)], max_epochs=10, lr=0.1, ) return, y)
Example #6
Source File: From carl with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, X, y): """Fit estimator on parameterized data. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values. * `y` [array-like, shape=(n_samples,)]: The output values. Returns ------- * `self` [object]: `self`. """ self.stacker_ = ParameterStacker(self.params) # XXX: this assumes that X is extended with parameters self.n_features_ = X.shape[1] - len(self.params) self.estimator_ = clone(self.base_estimator).fit(X, y) return self
Example #7
Source File: From ml-parameter-optimization with MIT License | 6 votes |
def apply_gridsearch(self,model): """ apply grid search on ml algorithm to specified parameters returns updated best score and parameters """ # check if custom evalution function is specified if callable(self.params_cv['scoring']): scoring = make_scorer(self.params_cv['scoring'],greater_is_better=self._greater_is_better) else: scoring = self.params_cv['scoring'] gsearch = GridSearchCV(estimator=model,param_grid=self.get_params_tune(),scoring=scoring, iid=self.params_cv['iid'],cv=self.params_cv['cv_folds'],n_jobs=self.params_cv['n_jobs']),self.y) # update best model if best_score is improved if (gsearch.best_score_ * self._score_mult) > (self.best_score * self._score_mult): self.best_model = clone(gsearch.best_estimator_) self.best_score = gsearch.best_score_ # update tuned parameters with optimal values for key,value in gsearch.best_params_.items(): self._params[key] = value self._temp_score = gsearch.best_score_ return self
Example #8
Source File: From pyod with BSD 2-Clause "Simplified" License | 6 votes |
def _make_estimator(self, append=True, random_state=None): """Make and configure a copy of the `base_estimator_` attribute. sklearn/ Warning: This method should be used to properly instantiate new sub-estimators. """ # TODO: add a check for estimator_param estimator = clone(self.base_estimator_) estimator.set_params(**self.estimator_params) if random_state is not None: _set_random_states(estimator, random_state) if append: self.estimators_.append(estimator) return estimator
Example #9
Source File: From xam with MIT License | 6 votes |
def fit(self, X, y=None, **fit_params): if not isinstance(X, pd.DataFrame): raise ValueError('X is not a pandas.DataFrame') self.models_ = {} columns = self._get_fit_columns(X) for key in X[].unique(): # Copy the model model = clone(self.base_model) # Select the rows that will be fitted mask = (X[] == key).tolist() rows = X.index[mask] # Fit the model[rows, columns], y[mask], **fit_params) # Save the model self.models_[key] = model return self
Example #10
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_nmf_sparse_input(): # Test that sparse matrices are accepted as input from scipy.sparse import csc_matrix rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(10, 10)) A[:, 2 * np.arange(5)] = 0 A_sparse = csc_matrix(A) for solver in ('cd', 'mu'): est1 = NMF(solver=solver, n_components=5, init='random', random_state=0, tol=1e-2) est2 = clone(est1) W1 = est1.fit_transform(A) W2 = est2.fit_transform(A_sparse) H1 = est1.components_ H2 = est2.components_ assert_array_almost_equal(W1, W2) assert_array_almost_equal(H1, H2)
Example #11
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_weighted_vs_repeated(): # a sample weight of N should yield the same result as an N-fold # repetition of the sample rng = np.random.RandomState(0) sample_weight = rng.randint(1, 5, size=n_samples) X_repeat = np.repeat(X, sample_weight, axis=0) estimators = [KMeans(init="k-means++", n_clusters=n_clusters, random_state=42), KMeans(init="random", n_clusters=n_clusters, random_state=42), KMeans(init=centers.copy(), n_clusters=n_clusters, random_state=42), MiniBatchKMeans(n_clusters=n_clusters, batch_size=10, random_state=42)] for estimator in estimators: est_weighted = clone(estimator).fit(X, sample_weight=sample_weight) est_repeated = clone(estimator).fit(X_repeat) repeated_labels = np.repeat(est_weighted.labels_, sample_weight) assert_almost_equal(v_measure_score(est_repeated.labels_, repeated_labels), 1.0) if not isinstance(estimator, MiniBatchKMeans): assert_almost_equal(_sort_centers(est_weighted.cluster_centers_), _sort_centers(est_repeated.cluster_centers_))
Example #12
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_classifier_results(): """tests if classifier results match target""" alpha = .1 n_features = 20 n_samples = 10 tol = .01 max_iter = 200 rng = np.random.RandomState(0) X = rng.normal(size=(n_samples, n_features)) w = rng.normal(size=n_features) y =, w) y = np.sign(y) clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=max_iter, tol=tol, random_state=77) clf2 = clone(clf1), y), y) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert_almost_equal(pred1, y, decimal=12) assert_almost_equal(pred2, y, decimal=12)
Example #13
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def check_cross_val_predict_multiclass(est, X, y, method): """Helper for tests of cross_val_predict with multiclass classification""" cv = KFold(n_splits=3, shuffle=False) # Generate expected outputs float_min = np.finfo(np.float64).min default_values = {'decision_function': float_min, 'predict_log_proba': float_min, 'predict_proba': 0} expected_predictions = np.full((len(X), len(set(y))), default_values[method], dtype=np.float64) _, y_enc = np.unique(y, return_inverse=True) for train, test in cv.split(X, y_enc): est = clone(est).fit(X[train], y_enc[train]) fold_preds = getattr(est, method)(X[test]) i_cols_fit = np.unique(y_enc[train]) expected_predictions[np.ix_(test, i_cols_fit)] = fold_preds # Check actual outputs for several representations of y for tg in [y, y + 1, y - 2, y.astype('str')]: assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv), expected_predictions)
Example #14
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_transform_target_regressor_2d_transformer_multioutput(): # Check consistency with transformer accepting only 2D array and a 2D y # array. X = friedman[0] y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T transformer = StandardScaler() regr = TransformedTargetRegressor(regressor=LinearRegression(), transformer=transformer) y_pred =, y).predict(X) assert y.shape == y_pred.shape # consistency forward transform y_tran = regr.transformer_.transform(y) _check_standard_scaled(y, y_tran) assert y.shape == y_pred.shape # consistency inverse transform assert_allclose(y, regr.transformer_.inverse_transform( y_tran).squeeze()) # consistency of the regressor lr = LinearRegression() transformer2 = clone(transformer), transformer2.fit_transform(y)) y_lr_pred = lr.predict(X) assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred)) assert_allclose(regr.regressor_.coef_, lr.coef_)
Example #15
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_fit_predict_on_pipeline(): # test that the fit_predict method is implemented on a pipeline # test that the fit_predict on pipeline yields same results as applying # transform and clustering steps separately iris = load_iris() scaler = StandardScaler() km = KMeans(random_state=0) # As pipeline doesn't clone estimators on construction, # it must have its own estimators scaler_for_pipeline = StandardScaler() km_for_pipeline = KMeans(random_state=0) # first compute the transform and clustering step separately scaled = scaler.fit_transform( separate_pred = km.fit_predict(scaled) # use a pipeline to do the transform and clustering in one step pipe = Pipeline([ ('scaler', scaler_for_pipeline), ('Kmeans', km_for_pipeline) ]) pipeline_pred = pipe.fit_predict( assert_array_almost_equal(pipeline_pred, separate_pred)
Example #16
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_base_chain_crossval_fit_and_predict(): # Fit chain with cross_val_predict and verify predict # performance X, Y = generate_multilabel_dataset_with_correlations() for chain in [ClassifierChain(LogisticRegression()), RegressorChain(Ridge())]:, Y) chain_cv = clone(chain).set_params(cv=3), Y) Y_pred_cv = chain_cv.predict(X) Y_pred = chain.predict(X) assert Y_pred_cv.shape == Y_pred.shape assert not np.all(Y_pred == Y_pred_cv) if isinstance(chain, ClassifierChain): assert jaccard_score(Y, Y_pred_cv, average='samples') > .4 else: assert mean_squared_error(Y, Y_pred_cv) < .25
Example #17
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_base_chain_random_order(): # Fit base chain with random order X, Y = generate_multilabel_dataset_with_correlations() for chain in [ClassifierChain(LogisticRegression()), RegressorChain(Ridge())]: chain_random = clone(chain).set_params(order='random', random_state=42), Y) chain_fixed = clone(chain).set_params(order=chain_random.order_), Y) assert_array_equal(chain_fixed.order_, chain_random.order_) assert_not_equal(list(chain_random.order), list(range(4))) assert_equal(len(chain_random.order_), 4) assert_equal(len(set(chain_random.order_)), 4) # Randomly ordered chain should behave identically to a fixed order # chain with the same order. for est1, est2 in zip(chain_random.estimators_, chain_fixed.estimators_): assert_array_almost_equal(est1.coef_, est2.coef_)
Example #18
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_clone_pandas_dataframe(): class DummyEstimator(BaseEstimator, TransformerMixin): """This is a dummy class for generating numerical features This feature extractor extracts numerical features from pandas data frame. Parameters ---------- df: pandas data frame The pandas data frame parameter. Notes ----- """ def __init__(self, df=None, scalar_param=1): self.df = df self.scalar_param = scalar_param def fit(self, X, y=None): pass def transform(self, X): pass # build and clone estimator d = np.arange(10) df = MockDataFrame(d) e = DummyEstimator(df, scalar_param=1) cloned_e = clone(e) # the test assert (e.df == cloned_e.df).values.all() assert_equal(e.scalar_param, cloned_e.scalar_param)
Example #19
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_sag_regressor(): """tests if the sag regressor performs well""" xmin, xmax = -5, 5 n_samples = 20 tol = .001 max_iter = 50 alpha = 0.1 rng = np.random.RandomState(0) X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1) # simple linear function without noise y = 0.5 * X.ravel() clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter, alpha=alpha * n_samples, random_state=rng) clf2 = clone(clf1), y), y) score1 = clf1.score(X, y) score2 = clf2.score(X, y) assert_greater(score1, 0.99) assert_greater(score2, 0.99) # simple linear function with noise y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel() clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter, alpha=alpha * n_samples) clf2 = clone(clf1), y), y) score1 = clf1.score(X, y) score2 = clf2.score(X, y) score2 = clf2.score(X, y) assert_greater(score1, 0.5) assert_greater(score2, 0.5)
Example #20
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_sag_pobj_matches_ridge_regression(): """tests if the sag pobj matches ridge reg""" n_samples = 100 n_features = 10 alpha = 1.0 n_iter = 100 fit_intercept = False rng = np.random.RandomState(10) X = rng.normal(size=(n_samples, n_features)) true_w = rng.normal(size=n_features) y = clf1 = Ridge(fit_intercept=fit_intercept, tol=.00000000001, solver='sag', alpha=alpha, max_iter=n_iter, random_state=42) clf2 = clone(clf1) clf3 = Ridge(fit_intercept=fit_intercept, tol=.00001, solver='lsqr', alpha=alpha, max_iter=n_iter, random_state=42), y), y), y) pobj1 = get_pobj(clf1.coef_, alpha, X, y, squared_loss) pobj2 = get_pobj(clf2.coef_, alpha, X, y, squared_loss) pobj3 = get_pobj(clf3.coef_, alpha, X, y, squared_loss) assert_array_almost_equal(pobj1, pobj2, decimal=4) assert_array_almost_equal(pobj1, pobj3, decimal=4) assert_array_almost_equal(pobj3, pobj2, decimal=4)
Example #21
Source File: From carl with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _clone(self): estimator = clone(self, original=True) if == "prefit": estimator.base_estimator = self.base_estimator return estimator
Example #22
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_sag_pobj_matches_logistic_regression(): """tests if the sag pobj matches log reg""" n_samples = 100 alpha = 1.0 max_iter = 20 X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0, cluster_std=0.1) clf1 = LogisticRegression(solver='sag', fit_intercept=False, tol=.0000001, C=1. / alpha / n_samples, max_iter=max_iter, random_state=10, multi_class='ovr') clf2 = clone(clf1) clf3 = LogisticRegression(fit_intercept=False, tol=.0000001, C=1. / alpha / n_samples, max_iter=max_iter, random_state=10, multi_class='ovr', solver='lbfgs'), y), y), y) pobj1 = get_pobj(clf1.coef_, alpha, X, y, log_loss) pobj2 = get_pobj(clf2.coef_, alpha, X, y, log_loss) pobj3 = get_pobj(clf3.coef_, alpha, X, y, log_loss) assert_array_almost_equal(pobj1, pobj2, decimal=4) assert_array_almost_equal(pobj2, pobj3, decimal=4) assert_array_almost_equal(pobj3, pobj1, decimal=4)
Example #23
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_clone(klass): # Test whether clone works ok. clf = klass(alpha=0.01, penalty='l1') clf = clone(clf) clf.set_params(penalty='l2'), Y) clf2 = klass(alpha=0.01, penalty='l2'), Y) assert_array_equal(clf.coef_, clf2.coef_)
Example #24
Source File: From DataShapley with MIT License | 5 votes |
def one_iteration(clf, X, y, X_test, y_test, mean_score, tol=0.0, c=None, metric='accuracy'): """Runs one iteration of TMC-Shapley.""" if metric == 'auc': def score_func(clf, a, b): return roc_auc_score(b, clf.predict_proba(a)[:,1]) elif metric == 'accuracy': def score_func(clf, a, b): return clf.score(a, b) else: raise ValueError("Wrong metric!") if c is None: c = {i:np.array([i]) for i in range(len(X))} idxs, marginal_contribs = np.random.permutation(len(c.keys())), np.zeros(len(X)) new_score = np.max(np.bincount(y)) * 1./len(y) if np.mean(y//1 == y/1)==1 else 0. start = 0 if start: X_batch, y_batch =\ np.concatenate([X[c[idx]] for idx in idxs[:start]]), np.concatenate([y[c[idx]] for idx in idxs[:start]]) else: X_batch, y_batch = np.zeros((0,) + tuple(X.shape[1:])), np.zeros(0).astype(int) for n, idx in enumerate(idxs[start:]): try: clf = clone(clf) except:,) + X.shape[1:]), y) old_score = new_score X_batch, y_batch = np.concatenate([X_batch, X[c[idx]]]), np.concatenate([y_batch, y[c[idx]]]) with warnings.catch_warnings(): warnings.simplefilter("ignore") try:, y_batch) temp_score = score_func(clf, X_test, y_test) if temp_score>-1 and temp_score<1.: #Removing measningless r2 scores new_score = temp_score except: continue marginal_contribs[c[idx]] = (new_score - old_score)/len(c[idx]) if np.abs(new_score - mean_score)/mean_score < tol: break return marginal_contribs, idxs
Example #25
Source File: From DataShapley with MIT License | 5 votes |
def restart_model(self): try: self.model = clone(self.model) except:,) + self.X.shape[1:]), self.y)
Example #26
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_multi_output_classification(): # test if multi_target initializes correctly with base estimator and fit # assert predictions work as expected for predict, prodict_proba and score forest = RandomForestClassifier(n_estimators=10, random_state=1) multi_target_forest = MultiOutputClassifier(forest) # train the multi_target_forest and also get the predictions., y) predictions = multi_target_forest.predict(X) assert_equal((n_samples, n_outputs), predictions.shape) predict_proba = multi_target_forest.predict_proba(X) assert len(predict_proba) == n_outputs for class_probabilities in predict_proba: assert_equal((n_samples, n_classes), class_probabilities.shape) assert_array_equal(np.argmax(np.dstack(predict_proba), axis=1), predictions) # train the forest with each column and assert that predictions are equal for i in range(3): forest_ = clone(forest) # create a clone with the same state, y[:, i]) assert_equal(list(forest_.predict(X)), list(predictions[:, i])) assert_array_equal(list(forest_.predict_proba(X)), list(predict_proba[i]))
Example #27
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_multi_output_classification_partial_fit(): # test if multi_target initializes correctly with base estimator and fit # assert predictions work as expected for predict sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5) multi_target_linear = MultiOutputClassifier(sgd_linear_clf) # train the multi_target_linear and also get the predictions. half_index = X.shape[0] // 2 multi_target_linear.partial_fit( X[:half_index], y[:half_index], classes=classes) first_predictions = multi_target_linear.predict(X) assert_equal((n_samples, n_outputs), first_predictions.shape) multi_target_linear.partial_fit(X[half_index:], y[half_index:]) second_predictions = multi_target_linear.predict(X) assert_equal((n_samples, n_outputs), second_predictions.shape) # train the linear classification with each column and assert that # predictions are equal after first partial_fit and second partial_fit for i in range(3): # create a clone with the same state sgd_linear_clf = clone(sgd_linear_clf) sgd_linear_clf.partial_fit( X[:half_index], y[:half_index, i], classes=classes[i]) assert_array_equal(sgd_linear_clf.predict(X), first_predictions[:, i]) sgd_linear_clf.partial_fit(X[half_index:], y[half_index:, i]) assert_array_equal(sgd_linear_clf.predict(X), second_predictions[:, i]) # 0.23. warning about tol not having its correct default value.
Example #28
Source File: From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def _check_behavior_2d_for_constant(clf): # 2d case only X = np.array([[0], [0], [0], [0]]) # ignored y = np.array([[1, 0, 5, 4, 3], [2, 0, 1, 2, 5], [1, 0, 4, 5, 2], [1, 3, 3, 2, 0]]) est = clone(clf), y) y_pred = est.predict(X) assert_equal(y.shape, y_pred.shape)
Example #29
Source File: From autoimpute with MIT License | 5 votes |
def fit(self, X, **kwargs): """Fit an individual classifier for each column in the DataFrame. For each feature in the DataFrame, a classifier (default: xgboost) is fit with the feature as the response (y) and all other features as covariates (X). The resulting classifiers are stored in the class instance statistics. One `fit` for each column in the dataset. Column specification will be supported as well. Args: X (pd.DataFrame): DataFrame on which to fit classifiers **kwargs: keyword arguments used by classifiers Returns: self: instance of MissingnessClassifier """ # start with fit checks self._fit_strategy_validator(X) self.statistics_ = {} # iterate missingness fit using classifier and all remaining columns for column in self.data_mi: # only fit non time-based columns... if not np.issubdtype(column, np.datetime64): y = self.data_mi[column] preds = self._preds[column] if preds == "all": x = X.drop(column, axis=1) else: x = X[preds] clf = clone(self.classifier) cls_fit =, y.values, **kwargs) self.statistics_[column] = cls_fit return self
Example #30
Source File: From carl with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _clone(estimator, safe=True, original=False): # XXX: This is a monkey patch to allow cloning of # CalibratedClassifierCV(cv="prefit"), while keeping the original # base_estimator. Do not reproduce at home! if hasattr(estimator, "_clone") and not original: return estimator._clone() else: return sk_clone(estimator, safe=safe)