Python sklearn.base.clone() Examples
The following are 30
code examples of sklearn.base.clone().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.base
, or try the search function
.
Example #1
Source File: test_dummy.py From Mastering-Elasticsearch-7.0 with MIT License | 7 votes |
def _check_behavior_2d(clf): # 1d case X = np.array([[0], [0], [0], [0]]) # ignored y = np.array([1, 2, 1, 1]) est = clone(clf) est.fit(X, y) y_pred = est.predict(X) assert_equal(y.shape, y_pred.shape) # 2d case y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]]) est = clone(clf) est.fit(X, y) y_pred = est.predict(X) assert_equal(y.shape, y_pred.shape)
Example #2
Source File: fixes.py From skutil with BSD 3-Clause "New" or "Revised" License | 7 votes |
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator, X, y, scorer, parameter_iterable, fit_params, error_score, cv, **kwargs): groups = kwargs.pop('groups') # test_score, n_samples, parameters out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)( delayed(_fit_and_score)( clone(base_estimator), X, y, scorer, train, test, verbose, parameters, fit_params=fit_params, return_train_score=False, return_n_test_samples=True, return_times=False, return_parameters=True, error_score=error_score) for parameters in parameter_iterable for train, test in cv.split(X, y, groups)) # test_score, n_samples, _, parameters return [(mod[0], mod[1], None, mod[2]) for mod in out]
Example #3
Source File: cluster.py From scikit-plot with MIT License | 6 votes |
def _clone_and_score_clusterer(clf, X, n_clusters): """Clones and scores clusterer instance. Args: clf: Clusterer instance that implements ``fit``,``fit_predict``, and ``score`` methods, and an ``n_clusters`` hyperparameter. e.g. :class:`sklearn.cluster.KMeans` instance X (array-like, shape (n_samples, n_features)): Data to cluster, where n_samples is the number of samples and n_features is the number of features. n_clusters (int): Number of clusters Returns: score: Score of clusters time: Number of seconds it took to fit cluster """ start = time.time() clf = clone(clf) setattr(clf, 'n_clusters', n_clusters) return clf.fit(X).score(X), time.time() - start
Example #4
Source File: test_net.py From skorch with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_changing_model_reinitializes_optimizer(self, net, data): # The idea is that we change the model using `set_params` to # add parameters. Since the optimizer depends on the model # parameters it needs to be reinitialized. X, y = data net.set_params(module__nonlin=nn.ReLU()) net.fit(X, y) net.set_params(module__nonlin=nn.PReLU()) assert isinstance(net.module_.nonlin, nn.PReLU) d1 = net.module_.nonlin.weight.data.clone().cpu().numpy() # make sure that we do not initialize again by making sure that # the network is initialized and by using partial_fit. assert net.initialized_ net.partial_fit(X, y) d2 = net.module_.nonlin.weight.data.clone().cpu().numpy() # all newly introduced parameters should have been trained (changed) # by the optimizer after 10 epochs. assert (abs(d2 - d1) > 1e-05).all()
Example #5
Source File: test_net.py From skorch with BSD 3-Clause "New" or "Revised" License | 6 votes |
def net_fit(self, net_cls, module_cls, dummy_callback, data): # Careful, don't call additional fits or set_params on this, # since that would have side effects on other tests. X, y = data # We need a new instance of the net and cannot reuse the net # fixture, because otherwise fixture net and net_fit refer to # the same object; also, we cannot clone(net) because this # will result in the dummy_callback not being the mock anymore net = net_cls( module_cls, callbacks=[('dummy', dummy_callback)], max_epochs=10, lr=0.1, ) return net.fit(X, y)
Example #6
Source File: parameterize.py From carl with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, X, y): """Fit estimator on parameterized data. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values. * `y` [array-like, shape=(n_samples,)]: The output values. Returns ------- * `self` [object]: `self`. """ self.stacker_ = ParameterStacker(self.params) # XXX: this assumes that X is extended with parameters self.n_features_ = X.shape[1] - len(self.params) self.estimator_ = clone(self.base_estimator).fit(X, y) return self
Example #7
Source File: ml_tune.py From ml-parameter-optimization with MIT License | 6 votes |
def apply_gridsearch(self,model): """ apply grid search on ml algorithm to specified parameters returns updated best score and parameters """ # check if custom evalution function is specified if callable(self.params_cv['scoring']): scoring = make_scorer(self.params_cv['scoring'],greater_is_better=self._greater_is_better) else: scoring = self.params_cv['scoring'] gsearch = GridSearchCV(estimator=model,param_grid=self.get_params_tune(),scoring=scoring, iid=self.params_cv['iid'],cv=self.params_cv['cv_folds'],n_jobs=self.params_cv['n_jobs']) gsearch.fit(self.X,self.y) # update best model if best_score is improved if (gsearch.best_score_ * self._score_mult) > (self.best_score * self._score_mult): self.best_model = clone(gsearch.best_estimator_) self.best_score = gsearch.best_score_ # update tuned parameters with optimal values for key,value in gsearch.best_params_.items(): self._params[key] = value self._temp_score = gsearch.best_score_ return self
Example #8
Source File: feature_bagging.py From pyod with BSD 2-Clause "Simplified" License | 6 votes |
def _make_estimator(self, append=True, random_state=None): """Make and configure a copy of the `base_estimator_` attribute. sklearn/base.py Warning: This method should be used to properly instantiate new sub-estimators. """ # TODO: add a check for estimator_param estimator = clone(self.base_estimator_) estimator.set_params(**self.estimator_params) if random_state is not None: _set_random_states(estimator, random_state) if append: self.estimators_.append(estimator) return estimator
Example #9
Source File: groupby_model.py From xam with MIT License | 6 votes |
def fit(self, X, y=None, **fit_params): if not isinstance(X, pd.DataFrame): raise ValueError('X is not a pandas.DataFrame') self.models_ = {} columns = self._get_fit_columns(X) for key in X[self.by].unique(): # Copy the model model = clone(self.base_model) # Select the rows that will be fitted mask = (X[self.by] == key).tolist() rows = X.index[mask] # Fit the model model.fit(X.loc[rows, columns], y[mask], **fit_params) # Save the model self.models_[key] = model return self
Example #10
Source File: test_nmf.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_nmf_sparse_input(): # Test that sparse matrices are accepted as input from scipy.sparse import csc_matrix rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(10, 10)) A[:, 2 * np.arange(5)] = 0 A_sparse = csc_matrix(A) for solver in ('cd', 'mu'): est1 = NMF(solver=solver, n_components=5, init='random', random_state=0, tol=1e-2) est2 = clone(est1) W1 = est1.fit_transform(A) W2 = est2.fit_transform(A_sparse) H1 = est1.components_ H2 = est2.components_ assert_array_almost_equal(W1, W2) assert_array_almost_equal(H1, H2)
Example #11
Source File: test_k_means.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_weighted_vs_repeated(): # a sample weight of N should yield the same result as an N-fold # repetition of the sample rng = np.random.RandomState(0) sample_weight = rng.randint(1, 5, size=n_samples) X_repeat = np.repeat(X, sample_weight, axis=0) estimators = [KMeans(init="k-means++", n_clusters=n_clusters, random_state=42), KMeans(init="random", n_clusters=n_clusters, random_state=42), KMeans(init=centers.copy(), n_clusters=n_clusters, random_state=42), MiniBatchKMeans(n_clusters=n_clusters, batch_size=10, random_state=42)] for estimator in estimators: est_weighted = clone(estimator).fit(X, sample_weight=sample_weight) est_repeated = clone(estimator).fit(X_repeat) repeated_labels = np.repeat(est_weighted.labels_, sample_weight) assert_almost_equal(v_measure_score(est_repeated.labels_, repeated_labels), 1.0) if not isinstance(estimator, MiniBatchKMeans): assert_almost_equal(_sort_centers(est_weighted.cluster_centers_), _sort_centers(est_repeated.cluster_centers_))
Example #12
Source File: test_sag.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_classifier_results(): """tests if classifier results match target""" alpha = .1 n_features = 20 n_samples = 10 tol = .01 max_iter = 200 rng = np.random.RandomState(0) X = rng.normal(size=(n_samples, n_features)) w = rng.normal(size=n_features) y = np.dot(X, w) y = np.sign(y) clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=max_iter, tol=tol, random_state=77) clf2 = clone(clf1) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert_almost_equal(pred1, y, decimal=12) assert_almost_equal(pred2, y, decimal=12)
Example #13
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def check_cross_val_predict_multiclass(est, X, y, method): """Helper for tests of cross_val_predict with multiclass classification""" cv = KFold(n_splits=3, shuffle=False) # Generate expected outputs float_min = np.finfo(np.float64).min default_values = {'decision_function': float_min, 'predict_log_proba': float_min, 'predict_proba': 0} expected_predictions = np.full((len(X), len(set(y))), default_values[method], dtype=np.float64) _, y_enc = np.unique(y, return_inverse=True) for train, test in cv.split(X, y_enc): est = clone(est).fit(X[train], y_enc[train]) fold_preds = getattr(est, method)(X[test]) i_cols_fit = np.unique(y_enc[train]) expected_predictions[np.ix_(test, i_cols_fit)] = fold_preds # Check actual outputs for several representations of y for tg in [y, y + 1, y - 2, y.astype('str')]: assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv), expected_predictions)
Example #14
Source File: test_target.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_transform_target_regressor_2d_transformer_multioutput(): # Check consistency with transformer accepting only 2D array and a 2D y # array. X = friedman[0] y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T transformer = StandardScaler() regr = TransformedTargetRegressor(regressor=LinearRegression(), transformer=transformer) y_pred = regr.fit(X, y).predict(X) assert y.shape == y_pred.shape # consistency forward transform y_tran = regr.transformer_.transform(y) _check_standard_scaled(y, y_tran) assert y.shape == y_pred.shape # consistency inverse transform assert_allclose(y, regr.transformer_.inverse_transform( y_tran).squeeze()) # consistency of the regressor lr = LinearRegression() transformer2 = clone(transformer) lr.fit(X, transformer2.fit_transform(y)) y_lr_pred = lr.predict(X) assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred)) assert_allclose(regr.regressor_.coef_, lr.coef_)
Example #15
Source File: test_pipeline.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_fit_predict_on_pipeline(): # test that the fit_predict method is implemented on a pipeline # test that the fit_predict on pipeline yields same results as applying # transform and clustering steps separately iris = load_iris() scaler = StandardScaler() km = KMeans(random_state=0) # As pipeline doesn't clone estimators on construction, # it must have its own estimators scaler_for_pipeline = StandardScaler() km_for_pipeline = KMeans(random_state=0) # first compute the transform and clustering step separately scaled = scaler.fit_transform(iris.data) separate_pred = km.fit_predict(scaled) # use a pipeline to do the transform and clustering in one step pipe = Pipeline([ ('scaler', scaler_for_pipeline), ('Kmeans', km_for_pipeline) ]) pipeline_pred = pipe.fit_predict(iris.data) assert_array_almost_equal(pipeline_pred, separate_pred)
Example #16
Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_base_chain_crossval_fit_and_predict(): # Fit chain with cross_val_predict and verify predict # performance X, Y = generate_multilabel_dataset_with_correlations() for chain in [ClassifierChain(LogisticRegression()), RegressorChain(Ridge())]: chain.fit(X, Y) chain_cv = clone(chain).set_params(cv=3) chain_cv.fit(X, Y) Y_pred_cv = chain_cv.predict(X) Y_pred = chain.predict(X) assert Y_pred_cv.shape == Y_pred.shape assert not np.all(Y_pred == Y_pred_cv) if isinstance(chain, ClassifierChain): assert jaccard_score(Y, Y_pred_cv, average='samples') > .4 else: assert mean_squared_error(Y, Y_pred_cv) < .25
Example #17
Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_base_chain_random_order(): # Fit base chain with random order X, Y = generate_multilabel_dataset_with_correlations() for chain in [ClassifierChain(LogisticRegression()), RegressorChain(Ridge())]: chain_random = clone(chain).set_params(order='random', random_state=42) chain_random.fit(X, Y) chain_fixed = clone(chain).set_params(order=chain_random.order_) chain_fixed.fit(X, Y) assert_array_equal(chain_fixed.order_, chain_random.order_) assert_not_equal(list(chain_random.order), list(range(4))) assert_equal(len(chain_random.order_), 4) assert_equal(len(set(chain_random.order_)), 4) # Randomly ordered chain should behave identically to a fixed order # chain with the same order. for est1, est2 in zip(chain_random.estimators_, chain_fixed.estimators_): assert_array_almost_equal(est1.coef_, est2.coef_)
Example #18
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_clone_pandas_dataframe(): class DummyEstimator(BaseEstimator, TransformerMixin): """This is a dummy class for generating numerical features This feature extractor extracts numerical features from pandas data frame. Parameters ---------- df: pandas data frame The pandas data frame parameter. Notes ----- """ def __init__(self, df=None, scalar_param=1): self.df = df self.scalar_param = scalar_param def fit(self, X, y=None): pass def transform(self, X): pass # build and clone estimator d = np.arange(10) df = MockDataFrame(d) e = DummyEstimator(df, scalar_param=1) cloned_e = clone(e) # the test assert (e.df == cloned_e.df).values.all() assert_equal(e.scalar_param, cloned_e.scalar_param)
Example #19
Source File: test_sag.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_sag_regressor(): """tests if the sag regressor performs well""" xmin, xmax = -5, 5 n_samples = 20 tol = .001 max_iter = 50 alpha = 0.1 rng = np.random.RandomState(0) X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1) # simple linear function without noise y = 0.5 * X.ravel() clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter, alpha=alpha * n_samples, random_state=rng) clf2 = clone(clf1) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) score1 = clf1.score(X, y) score2 = clf2.score(X, y) assert_greater(score1, 0.99) assert_greater(score2, 0.99) # simple linear function with noise y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel() clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter, alpha=alpha * n_samples) clf2 = clone(clf1) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) score1 = clf1.score(X, y) score2 = clf2.score(X, y) score2 = clf2.score(X, y) assert_greater(score1, 0.5) assert_greater(score2, 0.5)
Example #20
Source File: test_sag.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_sag_pobj_matches_ridge_regression(): """tests if the sag pobj matches ridge reg""" n_samples = 100 n_features = 10 alpha = 1.0 n_iter = 100 fit_intercept = False rng = np.random.RandomState(10) X = rng.normal(size=(n_samples, n_features)) true_w = rng.normal(size=n_features) y = X.dot(true_w) clf1 = Ridge(fit_intercept=fit_intercept, tol=.00000000001, solver='sag', alpha=alpha, max_iter=n_iter, random_state=42) clf2 = clone(clf1) clf3 = Ridge(fit_intercept=fit_intercept, tol=.00001, solver='lsqr', alpha=alpha, max_iter=n_iter, random_state=42) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) clf3.fit(X, y) pobj1 = get_pobj(clf1.coef_, alpha, X, y, squared_loss) pobj2 = get_pobj(clf2.coef_, alpha, X, y, squared_loss) pobj3 = get_pobj(clf3.coef_, alpha, X, y, squared_loss) assert_array_almost_equal(pobj1, pobj2, decimal=4) assert_array_almost_equal(pobj1, pobj3, decimal=4) assert_array_almost_equal(pobj3, pobj2, decimal=4)
Example #21
Source File: calibration.py From carl with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _clone(self): estimator = clone(self, original=True) if self.cv == "prefit": estimator.base_estimator = self.base_estimator return estimator
Example #22
Source File: test_sag.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_sag_pobj_matches_logistic_regression(): """tests if the sag pobj matches log reg""" n_samples = 100 alpha = 1.0 max_iter = 20 X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0, cluster_std=0.1) clf1 = LogisticRegression(solver='sag', fit_intercept=False, tol=.0000001, C=1. / alpha / n_samples, max_iter=max_iter, random_state=10, multi_class='ovr') clf2 = clone(clf1) clf3 = LogisticRegression(fit_intercept=False, tol=.0000001, C=1. / alpha / n_samples, max_iter=max_iter, random_state=10, multi_class='ovr', solver='lbfgs') clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) clf3.fit(X, y) pobj1 = get_pobj(clf1.coef_, alpha, X, y, log_loss) pobj2 = get_pobj(clf2.coef_, alpha, X, y, log_loss) pobj3 = get_pobj(clf3.coef_, alpha, X, y, log_loss) assert_array_almost_equal(pobj1, pobj2, decimal=4) assert_array_almost_equal(pobj2, pobj3, decimal=4) assert_array_almost_equal(pobj3, pobj1, decimal=4)
Example #23
Source File: test_sgd.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_clone(klass): # Test whether clone works ok. clf = klass(alpha=0.01, penalty='l1') clf = clone(clf) clf.set_params(penalty='l2') clf.fit(X, Y) clf2 = klass(alpha=0.01, penalty='l2') clf2.fit(X, Y) assert_array_equal(clf.coef_, clf2.coef_)
Example #24
Source File: shap_utils.py From DataShapley with MIT License | 5 votes |
def one_iteration(clf, X, y, X_test, y_test, mean_score, tol=0.0, c=None, metric='accuracy'): """Runs one iteration of TMC-Shapley.""" if metric == 'auc': def score_func(clf, a, b): return roc_auc_score(b, clf.predict_proba(a)[:,1]) elif metric == 'accuracy': def score_func(clf, a, b): return clf.score(a, b) else: raise ValueError("Wrong metric!") if c is None: c = {i:np.array([i]) for i in range(len(X))} idxs, marginal_contribs = np.random.permutation(len(c.keys())), np.zeros(len(X)) new_score = np.max(np.bincount(y)) * 1./len(y) if np.mean(y//1 == y/1)==1 else 0. start = 0 if start: X_batch, y_batch =\ np.concatenate([X[c[idx]] for idx in idxs[:start]]), np.concatenate([y[c[idx]] for idx in idxs[:start]]) else: X_batch, y_batch = np.zeros((0,) + tuple(X.shape[1:])), np.zeros(0).astype(int) for n, idx in enumerate(idxs[start:]): try: clf = clone(clf) except: clf.fit(np.zeros((0,) + X.shape[1:]), y) old_score = new_score X_batch, y_batch = np.concatenate([X_batch, X[c[idx]]]), np.concatenate([y_batch, y[c[idx]]]) with warnings.catch_warnings(): warnings.simplefilter("ignore") try: clf.fit(X_batch, y_batch) temp_score = score_func(clf, X_test, y_test) if temp_score>-1 and temp_score<1.: #Removing measningless r2 scores new_score = temp_score except: continue marginal_contribs[c[idx]] = (new_score - old_score)/len(c[idx]) if np.abs(new_score - mean_score)/mean_score < tol: break return marginal_contribs, idxs
Example #25
Source File: DShap.py From DataShapley with MIT License | 5 votes |
def restart_model(self): try: self.model = clone(self.model) except: self.model.fit(np.zeros((0,) + self.X.shape[1:]), self.y)
Example #26
Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_multi_output_classification(): # test if multi_target initializes correctly with base estimator and fit # assert predictions work as expected for predict, prodict_proba and score forest = RandomForestClassifier(n_estimators=10, random_state=1) multi_target_forest = MultiOutputClassifier(forest) # train the multi_target_forest and also get the predictions. multi_target_forest.fit(X, y) predictions = multi_target_forest.predict(X) assert_equal((n_samples, n_outputs), predictions.shape) predict_proba = multi_target_forest.predict_proba(X) assert len(predict_proba) == n_outputs for class_probabilities in predict_proba: assert_equal((n_samples, n_classes), class_probabilities.shape) assert_array_equal(np.argmax(np.dstack(predict_proba), axis=1), predictions) # train the forest with each column and assert that predictions are equal for i in range(3): forest_ = clone(forest) # create a clone with the same state forest_.fit(X, y[:, i]) assert_equal(list(forest_.predict(X)), list(predictions[:, i])) assert_array_equal(list(forest_.predict_proba(X)), list(predict_proba[i]))
Example #27
Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_multi_output_classification_partial_fit(): # test if multi_target initializes correctly with base estimator and fit # assert predictions work as expected for predict sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5) multi_target_linear = MultiOutputClassifier(sgd_linear_clf) # train the multi_target_linear and also get the predictions. half_index = X.shape[0] // 2 multi_target_linear.partial_fit( X[:half_index], y[:half_index], classes=classes) first_predictions = multi_target_linear.predict(X) assert_equal((n_samples, n_outputs), first_predictions.shape) multi_target_linear.partial_fit(X[half_index:], y[half_index:]) second_predictions = multi_target_linear.predict(X) assert_equal((n_samples, n_outputs), second_predictions.shape) # train the linear classification with each column and assert that # predictions are equal after first partial_fit and second partial_fit for i in range(3): # create a clone with the same state sgd_linear_clf = clone(sgd_linear_clf) sgd_linear_clf.partial_fit( X[:half_index], y[:half_index, i], classes=classes[i]) assert_array_equal(sgd_linear_clf.predict(X), first_predictions[:, i]) sgd_linear_clf.partial_fit(X[half_index:], y[half_index:, i]) assert_array_equal(sgd_linear_clf.predict(X), second_predictions[:, i]) # 0.23. warning about tol not having its correct default value.
Example #28
Source File: test_dummy.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def _check_behavior_2d_for_constant(clf): # 2d case only X = np.array([[0], [0], [0], [0]]) # ignored y = np.array([[1, 0, 5, 4, 3], [2, 0, 1, 2, 5], [1, 0, 4, 5, 2], [1, 3, 3, 2, 0]]) est = clone(clf) est.fit(X, y) y_pred = est.predict(X) assert_equal(y.shape, y_pred.shape)
Example #29
Source File: mis_classifier.py From autoimpute with MIT License | 5 votes |
def fit(self, X, **kwargs): """Fit an individual classifier for each column in the DataFrame. For each feature in the DataFrame, a classifier (default: xgboost) is fit with the feature as the response (y) and all other features as covariates (X). The resulting classifiers are stored in the class instance statistics. One `fit` for each column in the dataset. Column specification will be supported as well. Args: X (pd.DataFrame): DataFrame on which to fit classifiers **kwargs: keyword arguments used by classifiers Returns: self: instance of MissingnessClassifier """ # start with fit checks self._fit_strategy_validator(X) self.statistics_ = {} # iterate missingness fit using classifier and all remaining columns for column in self.data_mi: # only fit non time-based columns... if not np.issubdtype(column, np.datetime64): y = self.data_mi[column] preds = self._preds[column] if preds == "all": x = X.drop(column, axis=1) else: x = X[preds] clf = clone(self.classifier) cls_fit = clf.fit(x.values, y.values, **kwargs) self.statistics_[column] = cls_fit return self
Example #30
Source File: __init__.py From carl with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _clone(estimator, safe=True, original=False): # XXX: This is a monkey patch to allow cloning of # CalibratedClassifierCV(cv="prefit"), while keeping the original # base_estimator. Do not reproduce at home! if hasattr(estimator, "_clone") and not original: return estimator._clone() else: return sk_clone(estimator, safe=safe)