Python sklearn.clone() Examples
The following are 18
code examples of sklearn.clone().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn
, or try the search function
.

Example #1
Source File: yatsm.py From yatsm with MIT License | 6 votes |
def __init__(self, test_indices=None, estimator={'object': sklearn.linear_model.Lasso(alpha=20), 'fit': {}}, **kwargs): self.test_indices = np.asarray(test_indices) self.estimator = sklearn.clone(estimator['object']) self.estimator_fit = estimator.get('fit', {}) self.models = [] # leave empty, fill in during `fit` self.n_record = 0 self.record = [] self.n_series, self.n_features = 0, 0 self.px = kwargs.get('px', 0) self.py = kwargs.get('py', 0)
Example #2
Source File: _interpolated_thresholder.py From fairlearn with MIT License | 6 votes |
def fit(self, X, y, **kwargs): """Fit the estimator. If `prefit` is set to `True` then the base estimator is kept as is. Otherwise it is fitted from the provided arguments. """ if self.estimator is None: raise ValueError(BASE_ESTIMATOR_NONE_ERROR_MESSAGE) if not self.prefit: self.estimator_ = clone(self.estimator).fit(X, y, **kwargs) else: try: check_is_fitted(self.estimator) except NotFittedError: warn(BASE_ESTIMATOR_NOT_FITTED_WARNING.format(type(self).__name__)) self.estimator_ = self.estimator return self
Example #3
Source File: decay_estimator.py From scikit-lego with MIT License | 6 votes |
def fit(self, X, y): """ Fit the data after adapting the same weight. :param X: array-like, shape=(n_columns, n_samples,) training data. :param y: array-like, shape=(n_samples,) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) self.weights_ = np.cumprod(np.ones(X.shape[0]) * self.decay)[::-1] self.estimator_ = clone(self.model) try: self.estimator_.fit(X, y, sample_weight=self.weights_) except TypeError as e: if "sample_weight" in str(e): raise TypeError( f"Model {type(self.model).__name__}.fit() does not have 'sample_weight'" ) if self._is_classifier(): self.classes_ = self.estimator_.classes_ return self
Example #4
Source File: thresholder.py From scikit-lego with MIT License | 6 votes |
def fit(self, X, y): """ Fit the data. :param X: array-like, shape=(n_columns, n_samples,) training data. :param y: array-like, shape=(n_samples,) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) self.estimator_ = clone(self.model) if not isinstance(self.estimator_, ProbabilisticClassifier): raise ValueError( "The Thresholder meta model only works on classifcation models with .predict_proba." ) self.estimator_.fit(X, y) self.classes_ = self.estimator_.classes_ if len(self.classes_) != 2: raise ValueError( "The Thresholder meta model only works on models with two classes." ) return self
Example #5
Source File: test_nca.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_auto_init(n_samples, n_features, n_classes, n_components): # Test that auto choose the init as expected with every configuration # of order of n_samples, n_features, n_classes and n_components. rng = np.random.RandomState(42) nca_base = NeighborhoodComponentsAnalysis(init='auto', n_components=n_components, max_iter=1, random_state=rng) if n_classes >= n_samples: pass # n_classes > n_samples is impossible, and n_classes == n_samples # throws an error from lda but is an absurd case else: X = rng.randn(n_samples, n_features) y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples] if n_components > n_features: # this would return a ValueError, which is already tested in # test_params_validation pass else: nca = clone(nca_base) nca.fit(X, y) if n_components <= min(n_classes - 1, n_features): nca_other = clone(nca_base).set_params(init='lda') elif n_components < min(n_features, n_samples): nca_other = clone(nca_base).set_params(init='pca') else: nca_other = clone(nca_base).set_params(init='identity') nca_other.fit(X, y) assert_array_almost_equal(nca.components_, nca_other.components_)
Example #6
Source File: plot_binary_classification_COMPAS.py From fairlearn with MIT License | 5 votes |
def fit(self, X, y): try: check_is_fitted(self.logistic_regression_estimator) self.logistic_regression_estimator_ = self.logistic_regression_estimator except NotFittedError: self.logistic_regression_estimator_ = clone( self.logistic_regression_estimator ).fit(X, y) return self
Example #7
Source File: grouped_estimator.py From scikit-lego with MIT License | 5 votes |
def __fit_grouped_estimator(self, X, y, value_columns, group_columns): # Reset indices such that they are the same in X and y X, y = X.reset_index(drop=True), y.reset_index(drop=True) group_indices = X.groupby(group_columns).indices grouped_estimations = { group: clone(self.estimator).fit( X.loc[indices, value_columns], y.loc[indices] ) for group, indices in group_indices.items() } return grouped_estimations
Example #8
Source File: estimator_transformer.py From scikit-lego with MIT License | 5 votes |
def fit(self, X, y): """Fits the estimator""" X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) self.estimator_ = clone(self.estimator) self.estimator_.fit(X, y) return self
Example #9
Source File: outlier_remover.py From scikit-lego with MIT License | 5 votes |
def fit(self, X, y=None): self.estimator_ = clone(self.outlier_detector) if self.refit: super().fit(X, y) self.estimator_.fit(X, y) return self
Example #10
Source File: test_estimatortransformer.py From scikit-lego with MIT License | 5 votes |
def test_values_uniform(random_xy_dataset_clf): X, y = random_xy_dataset_clf X, y = check_X_y(X, y) clf = DummyClassifier(strategy="most_frequent") transformer = EstimatorTransformer(clone(clf)) transformed = transformer.fit(X, y).transform(X) assert transformed.shape == (y.shape[0], 1) assert np.all(transformed == clf.fit(X, y).predict(X))
Example #11
Source File: test_klusterfold.py From scikit-lego with MIT License | 5 votes |
def test_splits_not_fitted(cluster_method, random_xy_dataset_regr): cluster_method = clone(cluster_method) X, y = random_xy_dataset_regr kf = KlusterFoldValidation(cluster_method=cluster_method) for train_index, test_index in kf.split(X): assert len(train_index) > 0 assert len(test_index) > 0
Example #12
Source File: test_klusterfold.py From scikit-lego with MIT License | 5 votes |
def test_splits_fitted(cluster_method, random_xy_dataset_regr): cluster_method = clone(cluster_method) X, y = random_xy_dataset_regr cluster_method = cluster_method.fit(X) kf = KlusterFoldValidation(cluster_method=cluster_method) for train_index, test_index in kf.split(X): assert len(train_index) > 0 assert len(test_index) > 0
Example #13
Source File: estimator_checks.py From sktime with BSD 3-Clause "New" or "Revised" License | 5 votes |
def check_clone(Estimator): # Check we can call clone from scikit-learn estimator = _construct_instance(Estimator) clone(estimator)
Example #14
Source File: test_gridsearch.py From dislib with Apache License 2.0 | 5 votes |
def test_estimators_compatibility(self): """Tests that dislib estimators are compatible with GridSearchCV. GridSearchCV uses sklearn.clone(estimator), that requires estimators to have methods get_params() and set_params() working properly. This is what this test checks, and it can be easily achieved by making the estimators inherit from sklearn BaseEstimator""" estimators = (CascadeSVM, RandomForestClassifier, DBSCAN, KMeans, GaussianMixture, PCA, NearestNeighbors, ALS, LinearRegression) for estimator_class in estimators: self.assertIsInstance(estimator_class, type) est = estimator_class() # test __repr__ repr(est) # test cloning cloned = clone(est) # test that set_params returns self self.assertIs(cloned.set_params(), cloned) # Checks if get_params(deep=False) is a subset of # get_params(deep=True) shallow_params = est.get_params(deep=False) deep_params = est.get_params(deep=True) self.assertTrue(all(item in deep_params.items() for item in shallow_params.items()))
Example #15
Source File: test_nca.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_auto_init(n_samples, n_features, n_classes, n_components): # Test that auto choose the init as expected with every configuration # of order of n_samples, n_features, n_classes and n_components. rng = np.random.RandomState(42) nca_base = NeighborhoodComponentsAnalysis(init='auto', n_components=n_components, max_iter=1, random_state=rng) if n_classes >= n_samples: pass # n_classes > n_samples is impossible, and n_classes == n_samples # throws an error from lda but is an absurd case else: X = rng.randn(n_samples, n_features) y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples] if n_components > n_features: # this would return a ValueError, which is already tested in # test_params_validation pass else: nca = clone(nca_base) nca.fit(X, y) if n_components <= min(n_classes - 1, n_features): nca_other = clone(nca_base).set_params(init='lda') elif n_components < min(n_features, n_samples): nca_other = clone(nca_base).set_params(init='pca') else: nca_other = clone(nca_base).set_params(init='identity') nca_other.fit(X, y) assert_array_almost_equal(nca.components_, nca_other.components_)
Example #16
Source File: conftest.py From yatsm with MIT License | 4 votes |
def setup_dummy_YATSM(X, Y, dates, i_breaks): """ Setup a dummy YATSM model Args: X (np.ndarray): n x p features Y (np.ndarray): n_series x n independent data dates (np.ndarray): n dates i_breaks (iterable): indices of ``dates`` representing break dates (can be zero or nonzero, but len(i_breaks) is len(yatsm.record)) Returns: YATSM model """ n = dates.size yatsm = YATSM() yatsm.X, yatsm.Y, yatsm.dates = X, Y, dates yatsm.n_coef, yatsm.n_series = X.shape[1], Y.shape[0] yatsm.models = np.array([sklearn.clone(yatsm.estimator) for i in range(yatsm.n_series)]) yatsm.test_indices = np.arange(yatsm.n_series) n_models = len(i_breaks) yatsm.record = np.hstack([yatsm.record_template] * n_models) def populate_record(yatsm, i_rec, i_start, i_end, i_break): yatsm.record[i_rec]['start'] = yatsm.dates[i_start] yatsm.record[i_rec]['end'] = yatsm.dates[i_end] yatsm.record[i_rec]['break'] = (yatsm.dates[i_break] if i_break else i_break) yatsm.fit_models(X[i_start:i_end, :], Y[:, i_start:i_end]) for i, m in enumerate(yatsm.models): yatsm.record[i_rec]['coef'][:, i] = m.coef yatsm.record[i_rec]['rmse'][i] = m.rmse return yatsm i_start = 0 i_end = i_breaks[0] - 1 if i_breaks[0] else n - 1 i_break = i_breaks[0] yatsm = populate_record(yatsm, 0, i_start, i_end, i_break) for idx, i_break in enumerate(i_breaks[1:]): i_start = i_breaks[idx] + 1 i_end = i_break - 1 if i_break else n - 1 yatsm = populate_record(yatsm, idx + 1, i_start, i_end, i_break) return yatsm
Example #17
Source File: grouped_estimator.py From scikit-lego with MIT License | 4 votes |
def fit(self, X, y=None): """ Fit the model using X, y as training data. Will also learn the groups that exist within the dataset. :param X: array-like, shape=(n_columns, n_samples,) training data. :param y: array-like, shape=(n_samples,) training data. :return: Returns an instance of self. """ X, y = self.__prepare_input_data(X, y) if self.shrinkage is not None: self.__set_shrinkage_function() self.group_colnames_ = [str(_) for _ in as_list(self.groups)] if self.value_columns is not None: self.value_colnames_ = [str(_) for _ in as_list(self.value_columns)] else: self.value_colnames_ = [ _ for _ in X.columns if _ not in self.group_colnames_ ] self.__validate(X, y) # List of all hierarchical subsets of columns self.group_colnames_hierarchical_ = expanding_list(self.group_colnames_, list) self.fallback_ = None if self.shrinkage is None and self.use_global_model: subset_x = X[self.value_colnames_] self.fallback_ = clone(self.estimator).fit(subset_x, y) if self.shrinkage is not None: self.estimators_ = {} for level_colnames in self.group_colnames_hierarchical_: self.estimators_.update( self.__fit_grouped_estimator( X, y, self.value_colnames_, level_colnames ) ) else: self.estimators_ = self.__fit_grouped_estimator( X, y, self.value_colnames_, self.group_colnames_ ) self.groups_ = as_list(self.estimators_.keys()) if self.shrinkage is not None: self.shrinkage_factors_ = self.__get_shrinkage_factor(X) return self
Example #18
Source File: deepSuperLearnerLib.py From DeepSuperLearner with MIT License | 4 votes |
def fit(self, X, y, max_iterations=20, sample_weight=None): """ Fit DeepSuperLearner on training data (X,y). Parameters ---------- X : numpy array of shape [n,l] (Training samples with their l-features per sample) y : numpy array of shape [n] (Classification Ground-truth) Attributes ---------- max_iterations: maximum number of iterations until convergance. sample_weight: numpy array of shape [n,] Returns ------- self : returns an instance of self. """ n, j = len(y) , len(np.unique(y)) self.__classes_n = j latest_loss = np.finfo(np.double).max weights_per_iteration = [] fitted_learners_per_iteration = [] for iteration in range(max_iterations): fitted_learners_per_fold = np.empty(shape=(self.Kfolds, self.n_baselearners), dtype=np.object) y_pred_fold = np.empty(shape=(n, self.n_baselearners, j)) folds = StratifiedKFold(n_splits=self.Kfolds, shuffle=False) for fold_i, fold_indexes in enumerate(folds.split(X, y)): train_index, test_index = fold_indexes X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] for i, baselrn in enumerate(self.BL.items()): name, bl = baselrn baselearner = clone(bl) try: baselearner.fit(X_train, y_train, sample_weight=sample_weight) except TypeError as e: baselearner.fit(X_train, y_train) fitted_learners_per_fold[fold_i, i] = baselearner y_pred_fold[test_index, i, :] = self._get_prediction(baselearner, X_test) fitted_learners_per_iteration.append(fitted_learners_per_fold) tmp_weights = self._get_weights(y, y_pred_fold) avg_probs = self._get_weighted_prediction(y_pred_fold, tmp_weights) loss = self._get_logloss(y, avg_probs) weights_per_iteration.append(tmp_weights) print("Iteration: {} Loss: {}".format(iteration, loss)) print("Weights: ", tmp_weights) if loss < latest_loss: latest_loss = loss X = np.hstack((X, avg_probs)) else: weights_per_iteration = weights_per_iteration[:-1] fitted_learners_per_iteration = fitted_learners_per_iteration[:-1] break self.weights_per_iteration = weights_per_iteration self.fitted_learners_per_iteration = fitted_learners_per_iteration return self