Python sklearn.base.clone() Examples

The following are 30 code examples of sklearn.base.clone(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.base , or try the search function .
Example #1
Source File: test_dummy.py    From Mastering-Elasticsearch-7.0 with MIT License 7 votes vote down vote up
def _check_behavior_2d(clf):
    # 1d case
    X = np.array([[0], [0], [0], [0]])  # ignored
    y = np.array([1, 2, 1, 1])
    est = clone(clf)
    est.fit(X, y)
    y_pred = est.predict(X)
    assert_equal(y.shape, y_pred.shape)

    # 2d case
    y = np.array([[1, 0],
                  [2, 0],
                  [1, 0],
                  [1, 3]])
    est = clone(clf)
    est.fit(X, y)
    y_pred = est.predict(X)
    assert_equal(y.shape, y_pred.shape) 
Example #2
Source File: fixes.py    From skutil with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
                X, y, scorer, parameter_iterable, fit_params,
                error_score, cv, **kwargs):
        groups = kwargs.pop('groups')

        # test_score, n_samples, parameters
        out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
            delayed(_fit_and_score)(
                clone(base_estimator), X, y, scorer,
                train, test, verbose, parameters,
                fit_params=fit_params,
                return_train_score=False,
                return_n_test_samples=True,
                return_times=False,
                return_parameters=True,
                error_score=error_score)
            for parameters in parameter_iterable
            for train, test in cv.split(X, y, groups))

        # test_score, n_samples, _, parameters
        return [(mod[0], mod[1], None, mod[2]) for mod in out] 
Example #3
Source File: cluster.py    From scikit-plot with MIT License 6 votes vote down vote up
def _clone_and_score_clusterer(clf, X, n_clusters):
    """Clones and scores clusterer instance.

    Args:
        clf: Clusterer instance that implements ``fit``,``fit_predict``, and
            ``score`` methods, and an ``n_clusters`` hyperparameter.
            e.g. :class:`sklearn.cluster.KMeans` instance

        X (array-like, shape (n_samples, n_features)):
            Data to cluster, where n_samples is the number of samples and
            n_features is the number of features.

        n_clusters (int): Number of clusters

    Returns:
        score: Score of clusters

        time: Number of seconds it took to fit cluster
    """
    start = time.time()
    clf = clone(clf)
    setattr(clf, 'n_clusters', n_clusters)
    return clf.fit(X).score(X), time.time() - start 
Example #4
Source File: test_net.py    From skorch with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_changing_model_reinitializes_optimizer(self, net, data):
        # The idea is that we change the model using `set_params` to
        # add parameters. Since the optimizer depends on the model
        # parameters it needs to be reinitialized.
        X, y = data

        net.set_params(module__nonlin=nn.ReLU())
        net.fit(X, y)

        net.set_params(module__nonlin=nn.PReLU())
        assert isinstance(net.module_.nonlin, nn.PReLU)
        d1 = net.module_.nonlin.weight.data.clone().cpu().numpy()

        # make sure that we do not initialize again by making sure that
        # the network is initialized and by using partial_fit.
        assert net.initialized_
        net.partial_fit(X, y)
        d2 = net.module_.nonlin.weight.data.clone().cpu().numpy()

        # all newly introduced parameters should have been trained (changed)
        # by the optimizer after 10 epochs.
        assert (abs(d2 - d1) > 1e-05).all() 
Example #5
Source File: test_net.py    From skorch with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def net_fit(self, net_cls, module_cls, dummy_callback, data):
        # Careful, don't call additional fits or set_params on this,
        # since that would have side effects on other tests.
        X, y = data

        # We need a new instance of the net and cannot reuse the net
        # fixture, because otherwise fixture net and net_fit refer to
        # the same object; also, we cannot clone(net) because this
        # will result in the dummy_callback not being the mock anymore
        net = net_cls(
            module_cls,
            callbacks=[('dummy', dummy_callback)],
            max_epochs=10,
            lr=0.1,
        )
        return net.fit(X, y) 
Example #6
Source File: parameterize.py    From carl with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, y):
        """Fit estimator on parameterized data.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features+len(params))]:
            The samples, concatenated with the corresponding parameter values.

        * `y` [array-like, shape=(n_samples,)]:
            The output values.

        Returns
        -------
        * `self` [object]:
            `self`.
        """
        self.stacker_ = ParameterStacker(self.params)

        # XXX: this assumes that X is extended with parameters
        self.n_features_ = X.shape[1] - len(self.params)
        self.estimator_ = clone(self.base_estimator).fit(X, y)

        return self 
Example #7
Source File: ml_tune.py    From ml-parameter-optimization with MIT License 6 votes vote down vote up
def apply_gridsearch(self,model):
        """
        apply grid search on ml algorithm to specified parameters
        returns updated best score and parameters
        """
        # check if custom evalution function is specified
        if callable(self.params_cv['scoring']):
            scoring = make_scorer(self.params_cv['scoring'],greater_is_better=self._greater_is_better)
        else:
            scoring = self.params_cv['scoring']
        
        gsearch = GridSearchCV(estimator=model,param_grid=self.get_params_tune(),scoring=scoring,
                               iid=self.params_cv['iid'],cv=self.params_cv['cv_folds'],n_jobs=self.params_cv['n_jobs'])
        gsearch.fit(self.X,self.y)
        
        # update best model if best_score is improved
        if (gsearch.best_score_ * self._score_mult) > (self.best_score * self._score_mult):
            self.best_model = clone(gsearch.best_estimator_)
            self.best_score = gsearch.best_score_
        
        # update tuned parameters with optimal values
        for key,value in gsearch.best_params_.items():
            self._params[key] = value
        self._temp_score = gsearch.best_score_
        return self 
Example #8
Source File: feature_bagging.py    From pyod with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _make_estimator(self, append=True, random_state=None):
        """Make and configure a copy of the `base_estimator_` attribute.

        sklearn/base.py

        Warning: This method should be used to properly instantiate new
        sub-estimators.
        """

        # TODO: add a check for estimator_param
        estimator = clone(self.base_estimator_)
        estimator.set_params(**self.estimator_params)

        if random_state is not None:
            _set_random_states(estimator, random_state)

        if append:
            self.estimators_.append(estimator)

        return estimator 
Example #9
Source File: groupby_model.py    From xam with MIT License 6 votes vote down vote up
def fit(self, X, y=None, **fit_params):

        if not isinstance(X, pd.DataFrame):
            raise ValueError('X is not a pandas.DataFrame')

        self.models_ = {}

        columns = self._get_fit_columns(X)

        for key in X[self.by].unique():

            # Copy the model
            model = clone(self.base_model)

            # Select the rows that will be fitted
            mask = (X[self.by] == key).tolist()
            rows = X.index[mask]

            # Fit the model
            model.fit(X.loc[rows, columns], y[mask], **fit_params)

            # Save the model
            self.models_[key] = model

        return self 
Example #10
Source File: test_nmf.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_nmf_sparse_input():
    # Test that sparse matrices are accepted as input
    from scipy.sparse import csc_matrix

    rng = np.random.mtrand.RandomState(42)
    A = np.abs(rng.randn(10, 10))
    A[:, 2 * np.arange(5)] = 0
    A_sparse = csc_matrix(A)

    for solver in ('cd', 'mu'):
        est1 = NMF(solver=solver, n_components=5, init='random',
                   random_state=0, tol=1e-2)
        est2 = clone(est1)

    W1 = est1.fit_transform(A)
    W2 = est2.fit_transform(A_sparse)
    H1 = est1.components_
    H2 = est2.components_

    assert_array_almost_equal(W1, W2)
    assert_array_almost_equal(H1, H2) 
Example #11
Source File: test_k_means.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_weighted_vs_repeated():
    # a sample weight of N should yield the same result as an N-fold
    # repetition of the sample
    rng = np.random.RandomState(0)
    sample_weight = rng.randint(1, 5, size=n_samples)
    X_repeat = np.repeat(X, sample_weight, axis=0)
    estimators = [KMeans(init="k-means++", n_clusters=n_clusters,
                         random_state=42),
                  KMeans(init="random", n_clusters=n_clusters,
                         random_state=42),
                  KMeans(init=centers.copy(), n_clusters=n_clusters,
                         random_state=42),
                  MiniBatchKMeans(n_clusters=n_clusters, batch_size=10,
                                  random_state=42)]
    for estimator in estimators:
        est_weighted = clone(estimator).fit(X, sample_weight=sample_weight)
        est_repeated = clone(estimator).fit(X_repeat)
        repeated_labels = np.repeat(est_weighted.labels_, sample_weight)
        assert_almost_equal(v_measure_score(est_repeated.labels_,
                                            repeated_labels), 1.0)
        if not isinstance(estimator, MiniBatchKMeans):
            assert_almost_equal(_sort_centers(est_weighted.cluster_centers_),
                                _sort_centers(est_repeated.cluster_centers_)) 
Example #12
Source File: test_sag.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_classifier_results():
    """tests if classifier results match target"""
    alpha = .1
    n_features = 20
    n_samples = 10
    tol = .01
    max_iter = 200
    rng = np.random.RandomState(0)
    X = rng.normal(size=(n_samples, n_features))
    w = rng.normal(size=n_features)
    y = np.dot(X, w)
    y = np.sign(y)
    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                              max_iter=max_iter, tol=tol, random_state=77)
    clf2 = clone(clf1)

    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)
    pred1 = clf1.predict(X)
    pred2 = clf2.predict(X)
    assert_almost_equal(pred1, y, decimal=12)
    assert_almost_equal(pred2, y, decimal=12) 
Example #13
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def check_cross_val_predict_multiclass(est, X, y, method):
    """Helper for tests of cross_val_predict with multiclass classification"""
    cv = KFold(n_splits=3, shuffle=False)

    # Generate expected outputs
    float_min = np.finfo(np.float64).min
    default_values = {'decision_function': float_min,
                      'predict_log_proba': float_min,
                      'predict_proba': 0}
    expected_predictions = np.full((len(X), len(set(y))),
                                   default_values[method],
                                   dtype=np.float64)
    _, y_enc = np.unique(y, return_inverse=True)
    for train, test in cv.split(X, y_enc):
        est = clone(est).fit(X[train], y_enc[train])
        fold_preds = getattr(est, method)(X[test])
        i_cols_fit = np.unique(y_enc[train])
        expected_predictions[np.ix_(test, i_cols_fit)] = fold_preds

    # Check actual outputs for several representations of y
    for tg in [y, y + 1, y - 2, y.astype('str')]:
        assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv),
                        expected_predictions) 
Example #14
Source File: test_target.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_transform_target_regressor_2d_transformer_multioutput():
    # Check consistency with transformer accepting only 2D array and a 2D y
    # array.
    X = friedman[0]
    y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
    transformer = StandardScaler()
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      transformer=transformer)
    y_pred = regr.fit(X, y).predict(X)
    assert y.shape == y_pred.shape
    # consistency forward transform
    y_tran = regr.transformer_.transform(y)
    _check_standard_scaled(y, y_tran)
    assert y.shape == y_pred.shape
    # consistency inverse transform
    assert_allclose(y, regr.transformer_.inverse_transform(
        y_tran).squeeze())
    # consistency of the regressor
    lr = LinearRegression()
    transformer2 = clone(transformer)
    lr.fit(X, transformer2.fit_transform(y))
    y_lr_pred = lr.predict(X)
    assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
    assert_allclose(regr.regressor_.coef_, lr.coef_) 
Example #15
Source File: test_pipeline.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_fit_predict_on_pipeline():
    # test that the fit_predict method is implemented on a pipeline
    # test that the fit_predict on pipeline yields same results as applying
    # transform and clustering steps separately
    iris = load_iris()
    scaler = StandardScaler()
    km = KMeans(random_state=0)
    # As pipeline doesn't clone estimators on construction,
    # it must have its own estimators
    scaler_for_pipeline = StandardScaler()
    km_for_pipeline = KMeans(random_state=0)

    # first compute the transform and clustering step separately
    scaled = scaler.fit_transform(iris.data)
    separate_pred = km.fit_predict(scaled)

    # use a pipeline to do the transform and clustering in one step
    pipe = Pipeline([
        ('scaler', scaler_for_pipeline),
        ('Kmeans', km_for_pipeline)
    ])
    pipeline_pred = pipe.fit_predict(iris.data)

    assert_array_almost_equal(pipeline_pred, separate_pred) 
Example #16
Source File: test_multioutput.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_base_chain_crossval_fit_and_predict():
    # Fit chain with cross_val_predict and verify predict
    # performance
    X, Y = generate_multilabel_dataset_with_correlations()

    for chain in [ClassifierChain(LogisticRegression()),
                  RegressorChain(Ridge())]:
        chain.fit(X, Y)
        chain_cv = clone(chain).set_params(cv=3)
        chain_cv.fit(X, Y)
        Y_pred_cv = chain_cv.predict(X)
        Y_pred = chain.predict(X)

        assert Y_pred_cv.shape == Y_pred.shape
        assert not np.all(Y_pred == Y_pred_cv)
        if isinstance(chain, ClassifierChain):
            assert jaccard_score(Y, Y_pred_cv, average='samples') > .4
        else:
            assert mean_squared_error(Y, Y_pred_cv) < .25 
Example #17
Source File: test_multioutput.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_base_chain_random_order():
    # Fit base chain with random order
    X, Y = generate_multilabel_dataset_with_correlations()
    for chain in [ClassifierChain(LogisticRegression()),
                  RegressorChain(Ridge())]:
        chain_random = clone(chain).set_params(order='random', random_state=42)
        chain_random.fit(X, Y)
        chain_fixed = clone(chain).set_params(order=chain_random.order_)
        chain_fixed.fit(X, Y)
        assert_array_equal(chain_fixed.order_, chain_random.order_)
        assert_not_equal(list(chain_random.order), list(range(4)))
        assert_equal(len(chain_random.order_), 4)
        assert_equal(len(set(chain_random.order_)), 4)
        # Randomly ordered chain should behave identically to a fixed order
        # chain with the same order.
        for est1, est2 in zip(chain_random.estimators_,
                              chain_fixed.estimators_):
            assert_array_almost_equal(est1.coef_, est2.coef_) 
Example #18
Source File: test_base.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_clone_pandas_dataframe():

    class DummyEstimator(BaseEstimator, TransformerMixin):
        """This is a dummy class for generating numerical features

        This feature extractor extracts numerical features from pandas data
        frame.

        Parameters
        ----------

        df: pandas data frame
            The pandas data frame parameter.

        Notes
        -----
        """
        def __init__(self, df=None, scalar_param=1):
            self.df = df
            self.scalar_param = scalar_param

        def fit(self, X, y=None):
            pass

        def transform(self, X):
            pass

    # build and clone estimator
    d = np.arange(10)
    df = MockDataFrame(d)
    e = DummyEstimator(df, scalar_param=1)
    cloned_e = clone(e)

    # the test
    assert (e.df == cloned_e.df).values.all()
    assert_equal(e.scalar_param, cloned_e.scalar_param) 
Example #19
Source File: test_sag.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_sag_regressor():
    """tests if the sag regressor performs well"""
    xmin, xmax = -5, 5
    n_samples = 20
    tol = .001
    max_iter = 50
    alpha = 0.1
    rng = np.random.RandomState(0)
    X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1)

    # simple linear function without noise
    y = 0.5 * X.ravel()

    clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter,
                 alpha=alpha * n_samples, random_state=rng)
    clf2 = clone(clf1)
    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)
    score1 = clf1.score(X, y)
    score2 = clf2.score(X, y)
    assert_greater(score1, 0.99)
    assert_greater(score2, 0.99)

    # simple linear function with noise
    y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()

    clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter,
                 alpha=alpha * n_samples)
    clf2 = clone(clf1)
    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)
    score1 = clf1.score(X, y)
    score2 = clf2.score(X, y)
    score2 = clf2.score(X, y)
    assert_greater(score1, 0.5)
    assert_greater(score2, 0.5) 
Example #20
Source File: test_sag.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_sag_pobj_matches_ridge_regression():
    """tests if the sag pobj matches ridge reg"""
    n_samples = 100
    n_features = 10
    alpha = 1.0
    n_iter = 100
    fit_intercept = False
    rng = np.random.RandomState(10)
    X = rng.normal(size=(n_samples, n_features))
    true_w = rng.normal(size=n_features)
    y = X.dot(true_w)

    clf1 = Ridge(fit_intercept=fit_intercept, tol=.00000000001, solver='sag',
                 alpha=alpha, max_iter=n_iter, random_state=42)
    clf2 = clone(clf1)
    clf3 = Ridge(fit_intercept=fit_intercept, tol=.00001, solver='lsqr',
                 alpha=alpha, max_iter=n_iter, random_state=42)

    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)
    clf3.fit(X, y)

    pobj1 = get_pobj(clf1.coef_, alpha, X, y, squared_loss)
    pobj2 = get_pobj(clf2.coef_, alpha, X, y, squared_loss)
    pobj3 = get_pobj(clf3.coef_, alpha, X, y, squared_loss)

    assert_array_almost_equal(pobj1, pobj2, decimal=4)
    assert_array_almost_equal(pobj1, pobj3, decimal=4)
    assert_array_almost_equal(pobj3, pobj2, decimal=4) 
Example #21
Source File: calibration.py    From carl with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _clone(self):
        estimator = clone(self, original=True)
        if self.cv == "prefit":
            estimator.base_estimator = self.base_estimator

        return estimator 
Example #22
Source File: test_sag.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_sag_pobj_matches_logistic_regression():
    """tests if the sag pobj matches log reg"""
    n_samples = 100
    alpha = 1.0
    max_iter = 20
    X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0,
                      cluster_std=0.1)

    clf1 = LogisticRegression(solver='sag', fit_intercept=False, tol=.0000001,
                              C=1. / alpha / n_samples, max_iter=max_iter,
                              random_state=10, multi_class='ovr')
    clf2 = clone(clf1)
    clf3 = LogisticRegression(fit_intercept=False, tol=.0000001,
                              C=1. / alpha / n_samples, max_iter=max_iter,
                              random_state=10, multi_class='ovr',
                              solver='lbfgs')

    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)
    clf3.fit(X, y)

    pobj1 = get_pobj(clf1.coef_, alpha, X, y, log_loss)
    pobj2 = get_pobj(clf2.coef_, alpha, X, y, log_loss)
    pobj3 = get_pobj(clf3.coef_, alpha, X, y, log_loss)

    assert_array_almost_equal(pobj1, pobj2, decimal=4)
    assert_array_almost_equal(pobj2, pobj3, decimal=4)
    assert_array_almost_equal(pobj3, pobj1, decimal=4) 
Example #23
Source File: test_sgd.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_clone(klass):
    # Test whether clone works ok.
    clf = klass(alpha=0.01, penalty='l1')
    clf = clone(clf)
    clf.set_params(penalty='l2')
    clf.fit(X, Y)

    clf2 = klass(alpha=0.01, penalty='l2')
    clf2.fit(X, Y)

    assert_array_equal(clf.coef_, clf2.coef_) 
Example #24
Source File: shap_utils.py    From DataShapley with MIT License 5 votes vote down vote up
def one_iteration(clf, X, y, X_test, y_test, mean_score, tol=0.0, c=None, metric='accuracy'):
    """Runs one iteration of TMC-Shapley."""
    
    if metric == 'auc':
        def score_func(clf, a, b):
            return roc_auc_score(b, clf.predict_proba(a)[:,1])
    elif metric == 'accuracy':
        def score_func(clf, a, b):
            return clf.score(a, b)
    else:
        raise ValueError("Wrong metric!")  
    if c is None:
        c = {i:np.array([i]) for i in range(len(X))}
    idxs, marginal_contribs = np.random.permutation(len(c.keys())), np.zeros(len(X))
    new_score = np.max(np.bincount(y)) * 1./len(y) if np.mean(y//1 == y/1)==1 else 0.
    start = 0
    if start:
        X_batch, y_batch =\
        np.concatenate([X[c[idx]] for idx in idxs[:start]]), np.concatenate([y[c[idx]] for idx in idxs[:start]])
    else:
        X_batch, y_batch = np.zeros((0,) +  tuple(X.shape[1:])), np.zeros(0).astype(int)
    for n, idx in enumerate(idxs[start:]):
        try:
            clf = clone(clf)
        except:
            clf.fit(np.zeros((0,) +  X.shape[1:]), y)
        old_score = new_score
        X_batch, y_batch = np.concatenate([X_batch, X[c[idx]]]), np.concatenate([y_batch, y[c[idx]]])
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            try:
                clf.fit(X_batch, y_batch)
                temp_score = score_func(clf, X_test, y_test)
                if temp_score>-1 and temp_score<1.: #Removing measningless r2 scores
                    new_score = temp_score
            except:
                continue
        marginal_contribs[c[idx]] = (new_score - old_score)/len(c[idx])
        if np.abs(new_score - mean_score)/mean_score < tol:
            break
    return marginal_contribs, idxs 
Example #25
Source File: DShap.py    From DataShapley with MIT License 5 votes vote down vote up
def restart_model(self):
        
        try:
            self.model = clone(self.model)
        except:
            self.model.fit(np.zeros((0,) + self.X.shape[1:]), self.y) 
Example #26
Source File: test_multioutput.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_multi_output_classification():
    # test if multi_target initializes correctly with base estimator and fit
    # assert predictions work as expected for predict, prodict_proba and score

    forest = RandomForestClassifier(n_estimators=10, random_state=1)
    multi_target_forest = MultiOutputClassifier(forest)

    # train the multi_target_forest and also get the predictions.
    multi_target_forest.fit(X, y)

    predictions = multi_target_forest.predict(X)
    assert_equal((n_samples, n_outputs), predictions.shape)

    predict_proba = multi_target_forest.predict_proba(X)

    assert len(predict_proba) == n_outputs
    for class_probabilities in predict_proba:
        assert_equal((n_samples, n_classes), class_probabilities.shape)

    assert_array_equal(np.argmax(np.dstack(predict_proba), axis=1),
                       predictions)

    # train the forest with each column and assert that predictions are equal
    for i in range(3):
        forest_ = clone(forest)  # create a clone with the same state
        forest_.fit(X, y[:, i])
        assert_equal(list(forest_.predict(X)), list(predictions[:, i]))
        assert_array_equal(list(forest_.predict_proba(X)),
                           list(predict_proba[i])) 
Example #27
Source File: test_multioutput.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_multi_output_classification_partial_fit():
    # test if multi_target initializes correctly with base estimator and fit
    # assert predictions work as expected for predict

    sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)

    # train the multi_target_linear and also get the predictions.
    half_index = X.shape[0] // 2
    multi_target_linear.partial_fit(
        X[:half_index], y[:half_index], classes=classes)

    first_predictions = multi_target_linear.predict(X)
    assert_equal((n_samples, n_outputs), first_predictions.shape)

    multi_target_linear.partial_fit(X[half_index:], y[half_index:])
    second_predictions = multi_target_linear.predict(X)
    assert_equal((n_samples, n_outputs), second_predictions.shape)

    # train the linear classification with each column and assert that
    # predictions are equal after first partial_fit and second partial_fit
    for i in range(3):
        # create a clone with the same state
        sgd_linear_clf = clone(sgd_linear_clf)
        sgd_linear_clf.partial_fit(
            X[:half_index], y[:half_index, i], classes=classes[i])
        assert_array_equal(sgd_linear_clf.predict(X), first_predictions[:, i])
        sgd_linear_clf.partial_fit(X[half_index:], y[half_index:, i])
        assert_array_equal(sgd_linear_clf.predict(X), second_predictions[:, i])


# 0.23. warning about tol not having its correct default value. 
Example #28
Source File: test_dummy.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def _check_behavior_2d_for_constant(clf):
    # 2d case only
    X = np.array([[0], [0], [0], [0]])  # ignored
    y = np.array([[1, 0, 5, 4, 3],
                  [2, 0, 1, 2, 5],
                  [1, 0, 4, 5, 2],
                  [1, 3, 3, 2, 0]])
    est = clone(clf)
    est.fit(X, y)
    y_pred = est.predict(X)
    assert_equal(y.shape, y_pred.shape) 
Example #29
Source File: mis_classifier.py    From autoimpute with MIT License 5 votes vote down vote up
def fit(self, X, **kwargs):
        """Fit an individual classifier for each column in the DataFrame.

        For each feature in the DataFrame, a classifier (default: xgboost) is
        fit with the feature as the response (y) and all other features as
        covariates (X). The resulting classifiers are stored in the class
        instance statistics. One `fit` for each column in the dataset. Column
        specification will be supported as well.

        Args:
            X (pd.DataFrame): DataFrame on which to fit classifiers
            **kwargs: keyword arguments used by classifiers

        Returns:
            self: instance of MissingnessClassifier
        """

        # start with fit checks
        self._fit_strategy_validator(X)
        self.statistics_ = {}

        # iterate missingness fit using classifier and all remaining columns
        for column in self.data_mi:
            # only fit non time-based columns...
            if not np.issubdtype(column, np.datetime64):
                y = self.data_mi[column]
                preds = self._preds[column]
                if preds == "all":
                    x = X.drop(column, axis=1)
                else:
                    x = X[preds]
                clf = clone(self.classifier)
                cls_fit = clf.fit(x.values, y.values, **kwargs)
                self.statistics_[column] = cls_fit
        return self 
Example #30
Source File: __init__.py    From carl with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _clone(estimator, safe=True, original=False):
    # XXX: This is a monkey patch to allow cloning of
    #      CalibratedClassifierCV(cv="prefit"), while keeping the original
    #      base_estimator. Do not reproduce at home!
    if hasattr(estimator, "_clone") and not original:
        return estimator._clone()
    else:
        return sk_clone(estimator, safe=safe)