Python sklearn.clone() Examples

The following are 18 code examples of sklearn.clone(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn , or try the search function .
Example #1
Source File: yatsm.py    From yatsm with MIT License 6 votes vote down vote up
def __init__(self,
                 test_indices=None,
                 estimator={'object': sklearn.linear_model.Lasso(alpha=20),
                            'fit': {}},
                 **kwargs):
        self.test_indices = np.asarray(test_indices)
        self.estimator = sklearn.clone(estimator['object'])
        self.estimator_fit = estimator.get('fit', {})
        self.models = []  # leave empty, fill in during `fit`

        self.n_record = 0
        self.record = []

        self.n_series, self.n_features = 0, 0
        self.px = kwargs.get('px', 0)
        self.py = kwargs.get('py', 0) 
Example #2
Source File: thresholder.py    From scikit-lego with MIT License 6 votes vote down vote up
def fit(self, X, y):
        """
        Fit the data.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :param y: array-like, shape=(n_samples,) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        self.estimator_ = clone(self.model)
        if not isinstance(self.estimator_, ProbabilisticClassifier):
            raise ValueError(
                "The Thresholder meta model only works on classifcation models with .predict_proba."
            )
        self.estimator_.fit(X, y)
        self.classes_ = self.estimator_.classes_
        if len(self.classes_) != 2:
            raise ValueError(
                "The Thresholder meta model only works on models with two classes."
            )
        return self 
Example #3
Source File: decay_estimator.py    From scikit-lego with MIT License 6 votes vote down vote up
def fit(self, X, y):
        """
        Fit the data after adapting the same weight.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :param y: array-like, shape=(n_samples,) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        self.weights_ = np.cumprod(np.ones(X.shape[0]) * self.decay)[::-1]
        self.estimator_ = clone(self.model)
        try:
            self.estimator_.fit(X, y, sample_weight=self.weights_)
        except TypeError as e:
            if "sample_weight" in str(e):
                raise TypeError(
                    f"Model {type(self.model).__name__}.fit() does not have 'sample_weight'"
                )
        if self._is_classifier():
            self.classes_ = self.estimator_.classes_
        return self 
Example #4
Source File: _interpolated_thresholder.py    From fairlearn with MIT License 6 votes vote down vote up
def fit(self, X, y, **kwargs):
        """Fit the estimator.

        If `prefit` is set to `True` then the base estimator is kept as is.
        Otherwise it is fitted from the provided arguments.
        """
        if self.estimator is None:
            raise ValueError(BASE_ESTIMATOR_NONE_ERROR_MESSAGE)

        if not self.prefit:
            self.estimator_ = clone(self.estimator).fit(X, y, **kwargs)
        else:
            try:
                check_is_fitted(self.estimator)
            except NotFittedError:
                warn(BASE_ESTIMATOR_NOT_FITTED_WARNING.format(type(self).__name__))
            self.estimator_ = self.estimator
        return self 
Example #5
Source File: plot_binary_classification_COMPAS.py    From fairlearn with MIT License 5 votes vote down vote up
def fit(self, X, y):
        try:
            check_is_fitted(self.logistic_regression_estimator)
            self.logistic_regression_estimator_ = self.logistic_regression_estimator
        except NotFittedError:
            self.logistic_regression_estimator_ = clone(
                self.logistic_regression_estimator
            ).fit(X, y)
        return self 
Example #6
Source File: grouped_estimator.py    From scikit-lego with MIT License 5 votes vote down vote up
def __fit_grouped_estimator(self, X, y, value_columns, group_columns):
        # Reset indices such that they are the same in X and y
        X, y = X.reset_index(drop=True), y.reset_index(drop=True)

        group_indices = X.groupby(group_columns).indices

        grouped_estimations = {
            group: clone(self.estimator).fit(
                X.loc[indices, value_columns], y.loc[indices]
            )
            for group, indices in group_indices.items()
        }

        return grouped_estimations 
Example #7
Source File: test_nca.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_auto_init(n_samples, n_features, n_classes, n_components):
    # Test that auto choose the init as expected with every configuration
    # of order of n_samples, n_features, n_classes and n_components.
    rng = np.random.RandomState(42)
    nca_base = NeighborhoodComponentsAnalysis(init='auto',
                                              n_components=n_components,
                                              max_iter=1,
                                              random_state=rng)
    if n_classes >= n_samples:
        pass
        # n_classes > n_samples is impossible, and n_classes == n_samples
        # throws an error from lda but is an absurd case
    else:
        X = rng.randn(n_samples, n_features)
        y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
        if n_components > n_features:
            # this would return a ValueError, which is already tested in
            # test_params_validation
            pass
        else:
            nca = clone(nca_base)
            nca.fit(X, y)
            if n_components <= min(n_classes - 1, n_features):
                nca_other = clone(nca_base).set_params(init='lda')
            elif n_components < min(n_features, n_samples):
                nca_other = clone(nca_base).set_params(init='pca')
            else:
                nca_other = clone(nca_base).set_params(init='identity')
            nca_other.fit(X, y)
            assert_array_almost_equal(nca.components_, nca_other.components_) 
Example #8
Source File: estimator_transformer.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X, y):
        """Fits the estimator"""
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)

        self.estimator_ = clone(self.estimator)
        self.estimator_.fit(X, y)
        return self 
Example #9
Source File: outlier_remover.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X, y=None):
        self.estimator_ = clone(self.outlier_detector)
        if self.refit:
            super().fit(X, y)
            self.estimator_.fit(X, y)
        return self 
Example #10
Source File: test_estimatortransformer.py    From scikit-lego with MIT License 5 votes vote down vote up
def test_values_uniform(random_xy_dataset_clf):
    X, y = random_xy_dataset_clf
    X, y = check_X_y(X, y)
    clf = DummyClassifier(strategy="most_frequent")
    transformer = EstimatorTransformer(clone(clf))
    transformed = transformer.fit(X, y).transform(X)

    assert transformed.shape == (y.shape[0], 1)
    assert np.all(transformed == clf.fit(X, y).predict(X)) 
Example #11
Source File: test_klusterfold.py    From scikit-lego with MIT License 5 votes vote down vote up
def test_splits_not_fitted(cluster_method, random_xy_dataset_regr):
    cluster_method = clone(cluster_method)
    X, y = random_xy_dataset_regr
    kf = KlusterFoldValidation(cluster_method=cluster_method)
    for train_index, test_index in kf.split(X):
        assert len(train_index) > 0
        assert len(test_index) > 0 
Example #12
Source File: test_klusterfold.py    From scikit-lego with MIT License 5 votes vote down vote up
def test_splits_fitted(cluster_method, random_xy_dataset_regr):
    cluster_method = clone(cluster_method)
    X, y = random_xy_dataset_regr
    cluster_method = cluster_method.fit(X)
    kf = KlusterFoldValidation(cluster_method=cluster_method)
    for train_index, test_index in kf.split(X):
        assert len(train_index) > 0
        assert len(test_index) > 0 
Example #13
Source File: estimator_checks.py    From sktime with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def check_clone(Estimator):
    # Check we can call clone from scikit-learn
    estimator = _construct_instance(Estimator)
    clone(estimator) 
Example #14
Source File: test_gridsearch.py    From dislib with Apache License 2.0 5 votes vote down vote up
def test_estimators_compatibility(self):
        """Tests that dislib estimators are compatible with GridSearchCV.

        GridSearchCV uses sklearn.clone(estimator), that requires estimators to
        have methods get_params() and set_params() working properly. This is
        what this test checks, and it can be easily achieved by making the
        estimators inherit from sklearn BaseEstimator"""
        estimators = (CascadeSVM, RandomForestClassifier,
                      DBSCAN, KMeans, GaussianMixture,
                      PCA, NearestNeighbors, ALS, LinearRegression)

        for estimator_class in estimators:
            self.assertIsInstance(estimator_class, type)
            est = estimator_class()
            # test __repr__
            repr(est)
            # test cloning
            cloned = clone(est)
            # test that set_params returns self
            self.assertIs(cloned.set_params(), cloned)
            # Checks if get_params(deep=False) is a subset of
            # get_params(deep=True)
            shallow_params = est.get_params(deep=False)
            deep_params = est.get_params(deep=True)
            self.assertTrue(all(item in deep_params.items()
                                for item in shallow_params.items())) 
Example #15
Source File: test_nca.py    From scikit-hubness with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_auto_init(n_samples, n_features, n_classes, n_components):
    # Test that auto choose the init as expected with every configuration
    # of order of n_samples, n_features, n_classes and n_components.
    rng = np.random.RandomState(42)
    nca_base = NeighborhoodComponentsAnalysis(init='auto',
                                              n_components=n_components,
                                              max_iter=1,
                                              random_state=rng)
    if n_classes >= n_samples:
        pass
        # n_classes > n_samples is impossible, and n_classes == n_samples
        # throws an error from lda but is an absurd case
    else:
        X = rng.randn(n_samples, n_features)
        y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
        if n_components > n_features:
            # this would return a ValueError, which is already tested in
            # test_params_validation
            pass
        else:
            nca = clone(nca_base)
            nca.fit(X, y)
            if n_components <= min(n_classes - 1, n_features):
                nca_other = clone(nca_base).set_params(init='lda')
            elif n_components < min(n_features, n_samples):
                nca_other = clone(nca_base).set_params(init='pca')
            else:
                nca_other = clone(nca_base).set_params(init='identity')
            nca_other.fit(X, y)
            assert_array_almost_equal(nca.components_, nca_other.components_) 
Example #16
Source File: grouped_estimator.py    From scikit-lego with MIT License 4 votes vote down vote up
def fit(self, X, y=None):
        """
        Fit the model using X, y as training data. Will also learn the groups that exist within the dataset.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :param y: array-like, shape=(n_samples,) training data.
        :return: Returns an instance of self.
        """
        X, y = self.__prepare_input_data(X, y)

        if self.shrinkage is not None:
            self.__set_shrinkage_function()

        self.group_colnames_ = [str(_) for _ in as_list(self.groups)]

        if self.value_columns is not None:
            self.value_colnames_ = [str(_) for _ in as_list(self.value_columns)]
        else:
            self.value_colnames_ = [
                _ for _ in X.columns if _ not in self.group_colnames_
            ]
        self.__validate(X, y)

        # List of all hierarchical subsets of columns
        self.group_colnames_hierarchical_ = expanding_list(self.group_colnames_, list)

        self.fallback_ = None

        if self.shrinkage is None and self.use_global_model:
            subset_x = X[self.value_colnames_]
            self.fallback_ = clone(self.estimator).fit(subset_x, y)

        if self.shrinkage is not None:
            self.estimators_ = {}

            for level_colnames in self.group_colnames_hierarchical_:
                self.estimators_.update(
                    self.__fit_grouped_estimator(
                        X, y, self.value_colnames_, level_colnames
                    )
                )
        else:
            self.estimators_ = self.__fit_grouped_estimator(
                X, y, self.value_colnames_, self.group_colnames_
            )

        self.groups_ = as_list(self.estimators_.keys())

        if self.shrinkage is not None:
            self.shrinkage_factors_ = self.__get_shrinkage_factor(X)

        return self 
Example #17
Source File: conftest.py    From yatsm with MIT License 4 votes vote down vote up
def setup_dummy_YATSM(X, Y, dates, i_breaks):
    """ Setup a dummy YATSM model

    Args:
        X (np.ndarray): n x p features
        Y (np.ndarray): n_series x n independent data
        dates (np.ndarray): n dates
        i_breaks (iterable): indices of ``dates`` representing break dates
            (can be zero or nonzero, but len(i_breaks) is len(yatsm.record))

    Returns:
        YATSM model
    """
    n = dates.size
    yatsm = YATSM()
    yatsm.X, yatsm.Y, yatsm.dates = X, Y, dates
    yatsm.n_coef, yatsm.n_series = X.shape[1], Y.shape[0]
    yatsm.models = np.array([sklearn.clone(yatsm.estimator)
                             for i in range(yatsm.n_series)])
    yatsm.test_indices = np.arange(yatsm.n_series)
    n_models = len(i_breaks)
    yatsm.record = np.hstack([yatsm.record_template] * n_models)

    def populate_record(yatsm, i_rec, i_start, i_end, i_break):
        yatsm.record[i_rec]['start'] = yatsm.dates[i_start]
        yatsm.record[i_rec]['end'] = yatsm.dates[i_end]
        yatsm.record[i_rec]['break'] = (yatsm.dates[i_break] if i_break
                                        else i_break)
        yatsm.fit_models(X[i_start:i_end, :], Y[:, i_start:i_end])
        for i, m in enumerate(yatsm.models):
            yatsm.record[i_rec]['coef'][:, i] = m.coef
            yatsm.record[i_rec]['rmse'][i] = m.rmse
        return yatsm

    i_start = 0
    i_end = i_breaks[0] - 1 if i_breaks[0] else n - 1
    i_break = i_breaks[0]
    yatsm = populate_record(yatsm, 0, i_start, i_end, i_break)

    for idx, i_break in enumerate(i_breaks[1:]):
        i_start = i_breaks[idx] + 1
        i_end = i_break - 1 if i_break else n - 1
        yatsm = populate_record(yatsm, idx + 1, i_start, i_end, i_break)

    return yatsm 
Example #18
Source File: deepSuperLearnerLib.py    From DeepSuperLearner with MIT License 4 votes vote down vote up
def fit(self, X, y, max_iterations=20, sample_weight=None):
        """
        Fit DeepSuperLearner on training data (X,y).

        Parameters
        ----------
        X : numpy array of shape [n,l] (Training samples with their l-features per sample) 
        y : numpy array of shape [n] (Classification Ground-truth)
        
        Attributes
        ----------
        max_iterations: maximum number of iterations until convergance.
        sample_weight: numpy array of shape [n,]
        
        Returns
        -------
        self : returns an instance of self.
        """
        n, j = len(y) , len(np.unique(y))
        self.__classes_n = j
        latest_loss = np.finfo(np.double).max
        weights_per_iteration = []
        fitted_learners_per_iteration = []
        for iteration in range(max_iterations):
            fitted_learners_per_fold = np.empty(shape=(self.Kfolds, self.n_baselearners),
                                                dtype=np.object)
            y_pred_fold = np.empty(shape=(n, self.n_baselearners, j))
            folds = StratifiedKFold(n_splits=self.Kfolds, shuffle=False)
            for fold_i, fold_indexes in enumerate(folds.split(X, y)):
                train_index, test_index = fold_indexes
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                for i, baselrn in enumerate(self.BL.items()):
                    name, bl = baselrn
                    baselearner = clone(bl)
                    try:
                        baselearner.fit(X_train, y_train, sample_weight=sample_weight)
                    except TypeError as e:
                        baselearner.fit(X_train, y_train)
                    fitted_learners_per_fold[fold_i, i] = baselearner
                    y_pred_fold[test_index, i, :] = self._get_prediction(baselearner, X_test)
            
            fitted_learners_per_iteration.append(fitted_learners_per_fold)
            tmp_weights = self._get_weights(y, y_pred_fold)
            avg_probs = self._get_weighted_prediction(y_pred_fold, tmp_weights)
            loss = self._get_logloss(y, avg_probs)
            weights_per_iteration.append(tmp_weights)
            print("Iteration: {} Loss: {}".format(iteration, loss))
            print("Weights: ", tmp_weights)
            if loss < latest_loss:
                latest_loss = loss
                X = np.hstack((X, avg_probs))
            else:
                weights_per_iteration = weights_per_iteration[:-1]
                fitted_learners_per_iteration = fitted_learners_per_iteration[:-1]
                break
        
        self.weights_per_iteration = weights_per_iteration
        self.fitted_learners_per_iteration = fitted_learners_per_iteration

        return self