Python sklearn.base.BaseEstimator() Examples

The following are 30 code examples of sklearn.base.BaseEstimator(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.base , or try the search function .
Example #1
Source File: test_builder.py    From gordo with GNU Affero General Public License v3.0 7 votes vote down vote up
def test_get_metadata_helper(model: BaseEstimator, expect_empty_dict: bool):
    """
    Ensure the builder works with various model configs and that each has
    expected/valid metadata results.
    """

    X, y = np.random.random((1000, 4)), np.random.random((1000,))

    model.fit(X, y)

    metadata = ModelBuilder._extract_metadata_from_model(model)

    # All the metadata we've implemented so far is 'history', so we'll check that
    if not expect_empty_dict:
        assert "history" in metadata
        assert all(
            name in metadata["history"] for name in ("params", "loss", "accuracy")
        )
    else:
        assert dict() == metadata 
Example #2
Source File: utils.py    From gordo with GNU Affero General Public License v3.0 6 votes vote down vote up
def load_model(directory: str, name: str) -> BaseEstimator:
    """
    Load a given model from the directory by name.

    Parameters
    ----------
    directory: str
        Directory to look for the model
    name: str
        Name of the model to load, this would be the sub directory within the
        directory parameter.

    Returns
    -------
    BaseEstimator
    """
    start_time = timeit.default_timer()
    model = serializer.load(os.path.join(directory, name))
    logger.debug(f"Time to load model: {timeit.default_timer() - start_time}s")
    return model 
Example #3
Source File: build_model.py    From gordo with GNU Affero General Public License v3.0 6 votes vote down vote up
def _determine_offset(
        model: BaseEstimator, X: Union[np.ndarray, pd.DataFrame]
    ) -> int:
        """
        Determine the model's offset. How much does the output of the model differ
        from its input?

        Parameters
        ----------
        model: sklearn.base.BaseEstimator
            Trained model with either ``predict`` or ``transform`` method, preference
            given to ``predict``.
        X: Union[np.ndarray, pd.DataFrame]
            Data to pass to the model's ``predict`` or ``transform`` method.

        Returns
        -------
        int
            The difference between X and the model's output lengths.
        """
        out = model.predict(X) if hasattr(model, "predict") else model.transform(X)
        return len(X) - len(out) 
Example #4
Source File: run.py    From nyaggle with MIT License 6 votes vote down vote up
def _dispatch_models(algorithm_type: Union[str, Type[BaseEstimator]],
                     target_type: str, custom_eval: Optional[Callable] = None):
    if not isinstance(algorithm_type, str):
        assert issubclass(algorithm_type, BaseEstimator), "algorithm_type should be str or subclass of BaseEstimator"
        return algorithm_type, _dispatch_eval_func(target_type, custom_eval), None

    cat_features = {
        'lgbm': 'categorical_feature',
        'cat': 'cat_features',
        'xgb': None
    }

    gbdt_class = _dispatch_gbdt_class(algorithm_type, target_type)
    eval_func = _dispatch_eval_func(target_type, custom_eval)

    return gbdt_class, eval_func, cat_features[algorithm_type] 
Example #5
Source File: test_weight_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_sample_weight_adaboost_regressor():
    """
    AdaBoostRegressor should work without sample_weights in the base estimator
    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    """
    class DummyEstimator(BaseEstimator):

        def fit(self, X, y):
            pass

        def predict(self, X):
            return np.zeros(X.shape[0])

    boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3)
    boost.fit(X, y_regr)
    assert_equal(len(boost.estimator_weights_), len(boost.estimator_errors_)) 
Example #6
Source File: test_calibration.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_calibration_accepts_ndarray(X):
    """Test that calibration accepts n-dimensional arrays as input"""
    y = [1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0]

    class MockTensorClassifier(BaseEstimator):
        """A toy estimator that accepts tensor inputs"""

        def fit(self, X, y):
            self.classes_ = np.unique(y)
            return self

        def decision_function(self, X):
            # toy decision function that just needs to have the right shape:
            return X.reshape(X.shape[0], -1).sum(axis=1)

    calibrated_clf = CalibratedClassifierCV(MockTensorClassifier())
    # we should be able to fit this classifier with no error
    calibrated_clf.fit(X, y) 
Example #7
Source File: model.py    From gobbli with Apache License 2.0 6 votes vote down vote up
def persist_estimator(estimator: BaseEstimator) -> Path:
    """
    Saves the given estimator to a gobbli-managed filepath, where it can be loaded from
    disk by the SKLearnClassifier.  This is useful if you want to use an estimator but
    don't want to bother with saving it to disk on your own.

    Args:
      estimator: The estimator to load.

    Returns:
      The path where the estimator was saved.
    """
    estimator_dir = (
        SKLearnClassifier.model_class_dir() / "user_estimators" / generate_uuid()
    )
    estimator_dir.mkdir(exist_ok=True, parents=True)

    estimator_path = estimator_dir / SKLearnClassifier._TRAIN_OUTPUT_CHECKPOINT
    SKLearnClassifier._dump_estimator(estimator, estimator_path)

    return estimator_path 
Example #8
Source File: model.py    From gobbli with Apache License 2.0 6 votes vote down vote up
def _validate_estimator(estimator: BaseEstimator):
        """
        Run some checks on the given object to determine if it's an estimator which is
        valid for our purposes.
        """
        # sklearn has a function that does a lot more intensive checking regarding
        # the interface of a candidate Estimator
        # (sklearn.utils.estimator_checks.check_estimator), but the function
        # doesn't work well for our use case as of version 0.22.  It doesn't properly
        # detect Pipeline X_types based on the first pipeline component and won't
        # test anything that doesn't accept a 2-D numpy array as input.  We'll settle
        # for lax checks here until sklearn has something that works better for us.
        if not is_classifier(estimator):
            raise ValueError(
                "Estimator must be a classifier according to sklearn.base.is_classifier()"
            )

        if not hasattr(estimator, "predict_proba"):
            raise ValueError(
                "Estimator must support the predict_proba() method to fulfill gobbli's "
                "interface requirements for a prediction model."
            ) 
Example #9
Source File: cli.py    From skorch with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def print_help(model, defaults=None):
    """Print help for the command line arguments of the given model.

    Parameters
    ----------
    model : sklearn.base.BaseEstimator
      The basic model, e.g. a ``NeuralNet`` or sklearn ``Pipeline``.

    defautls : dict or None (default=None)
      Optionally, change the default values to use custom
      defaults. Commandline arguments have precedence over defaults.

    """
    defaults = defaults or {}

    print("This is the help for the model-specific parameters.")
    print("To invoke help for the remaining options, run:")
    print("python {} -- --help".format(sys.argv[0]))
    print()

    lines = (_get_help_for_estimator(prefix, estimator, defaults=defaults) for
             prefix, estimator in _yield_estimators(model))
    print('\n'.join(chain(*lines))) 
Example #10
Source File: test_sklearn_model_io.py    From kryptoflow with GNU General Public License v3.0 6 votes vote down vote up
def test_loader(sklearn_model, project_manager):
    skl = SklearnModel(artifact=sklearn_model)
    skl.store(name='clf')
    reloaded = skl.load(name='clf')
    assert isinstance(reloaded, BaseEstimator)

    skl2 = SklearnModel(artifact=sklearn_model)
    skl2.store(name='clf')
    reload_first = skl.load(run_number=1, name='clf')
    assert isinstance(reload_first, BaseEstimator)

    for root, dirs, files in os.walk(project_manager.CONFIG['saved-models']):
        for f in files:
            os.unlink(os.path.join(root, f))
        for d in dirs:
            shutil.rmtree(os.path.join(root, d))

    with open(os.path.join(project_manager.CONFIG['saved-models'], '.gitkeep'), 'w') as gitkeep:
        gitkeep.write('empty') 
Example #11
Source File: test_sklearn_model_io.py    From kryptoflow with GNU General Public License v3.0 6 votes vote down vote up
def test_trainable_model_from_file(sklearn_model, project_manager):
    skl = SklearnModel(artifact=sklearn_model)
    # lr = LogisticRegression()
    # trainable = TrainableModel(artifact=lr)

    skl.store(name='clf')
    trainable = TrainableModel.from_file(run_number=1, name='clf', model_type='sklearn')
    assert isinstance(trainable.model, BaseEstimator)
    for root, dirs, files in os.walk(project_manager.CONFIG['saved-models']):
        for f in files:
            os.unlink(os.path.join(root, f))
        for d in dirs:
            shutil.rmtree(os.path.join(root, d))

    with open(os.path.join(project_manager.CONFIG['saved-models'], '.gitkeep'), 'w') as gitkeep:
        gitkeep.write('empty') 
Example #12
Source File: mis_classifier.py    From autoimpute with MIT License 6 votes vote down vote up
def __init__(self, classifier=None, predictors="all"):
        """Create an instance of the MissingnessClassifier.

        The MissingnessClassifier inherits from sklearn BaseEstimator and
        ClassifierMixin. This inheritence and this class' implementation
        ensure that the MissingnessClassifier is a valid classifier that will
        work in an sklearn pipeline.

        Args:
            classifier (classifier, optional): valid classifier from sklearn.
                If None, default is xgboost. Note that classifier must
                conform to sklearn style. This means it must implement the
                `predict_proba` method and act as a porper classifier.
            predictors (str, iter, dict, optiona): defaults to all, i.e.
                use all predictors. If all, every column will be used for
                every class prediction. If a list, subset of columns used for
                all predictions. If a dict, specify which columns to use as
                predictors for each imputation. Columns not specified in dict
                will receive `all` by default.
        """
        self.classifier = classifier
        self.predictors = predictors 
Example #13
Source File: combination.py    From modAL with MIT License 6 votes vote down vote up
def make_query_strategy(utility_measure: Callable, selector: Callable) -> Callable:
    """
    Takes the given utility measure and selector functions and makes a query strategy by combining them.

    Args:
        utility_measure: Utility measure, for instance :func:`~modAL.disagreement.vote_entropy`, but it can be a custom
            function as well. Should take a classifier and the unlabelled data and should return an array containing the
            utility scores.
        selector: Function selecting instances for query. Should take an array of utility scores and should return an
            array containing the queried items.

    Returns:
        A function which returns queried instances given a classifier and an unlabelled pool.
    """
    def query_strategy(classifier: BaseEstimator, X: modALinput) -> Tuple:
        utility = utility_measure(classifier, X)
        query_idx = selector(utility)
        return query_idx, X[query_idx]

    return query_strategy 
Example #14
Source File: validation.py    From modAL with MIT License 6 votes vote down vote up
def check_class_labels(*args: BaseEstimator) -> bool:
    """
    Checks the known class labels for each classifier.

    Args:
        *args: Classifier objects to check the known class labels.

    Returns:
        True, if class labels match for all classifiers, False otherwise.
    """
    try:
        classes_ = [estimator.classes_ for estimator in args]
    except AttributeError:
        raise NotFittedError('Not all estimators are fitted. Fit all estimators before using this method.')

    for classifier_idx in range(len(args) - 1):
        if not np.array_equal(classes_[classifier_idx], classes_[classifier_idx+1]):
            return False

    return True 
Example #15
Source File: base.py    From modAL with MIT License 6 votes vote down vote up
def __init__(self,
                 estimator: BaseEstimator,
                 query_strategy: Callable,
                 X_training: Optional[modALinput] = None,
                 y_training: Optional[modALinput] = None,
                 bootstrap_init: bool = False,
                 force_all_finite: bool = True,
                 **fit_kwargs
                 ) -> None:
        assert callable(query_strategy), 'query_strategy must be callable'

        self.estimator = estimator
        self.query_strategy = query_strategy

        self.X_training = X_training
        self.y_training = y_training
        if X_training is not None:
            self._fit_to_known(bootstrap=bootstrap_init, **fit_kwargs)

        assert isinstance(force_all_finite, bool), 'force_all_finite must be a bool'
        self.force_all_finite = force_all_finite 
Example #16
Source File: uncertainty.py    From modAL with MIT License 6 votes vote down vote up
def classifier_uncertainty(classifier: BaseEstimator, X: modALinput, **predict_proba_kwargs) -> np.ndarray:
    """
    Classification uncertainty of the classifier for the provided samples.

    Args:
        classifier: The classifier for which the uncertainty is to be measured.
        X: The samples for which the uncertainty of classification is to be measured.
        **predict_proba_kwargs: Keyword arguments to be passed for the :meth:`predict_proba` of the classifier.

    Returns:
        Classifier uncertainty, which is 1 - P(prediction is correct).
    """
    # calculate uncertainty for each point provided
    try:
        classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs)
    except NotFittedError:
        return np.ones(shape=(X.shape[0], ))

    # for each point, select the maximum uncertainty
    uncertainty = 1 - np.max(classwise_uncertainty, axis=1)
    return uncertainty 
Example #17
Source File: uncertainty.py    From modAL with MIT License 6 votes vote down vote up
def classifier_margin(classifier: BaseEstimator, X: modALinput, **predict_proba_kwargs) -> np.ndarray:
    """
    Classification margin uncertainty of the classifier for the provided samples. This uncertainty measure takes the
    first and second most likely predictions and takes the difference of their probabilities, which is the margin.

    Args:
        classifier: The classifier for which the prediction margin is to be measured.
        X: The samples for which the prediction margin of classification is to be measured.
        **predict_proba_kwargs: Keyword arguments to be passed for the :meth:`predict_proba` of the classifier.

    Returns:
        Margin uncertainty, which is the difference of the probabilities of first and second most likely predictions.
    """
    try:
        classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs)
    except NotFittedError:
        return np.zeros(shape=(X.shape[0], ))

    if classwise_uncertainty.shape[1] == 1:
        return np.zeros(shape=(classwise_uncertainty.shape[0],))

    part = np.partition(-classwise_uncertainty, 1, axis=1)
    margin = - part[:, 0] + part[:, 1]

    return margin 
Example #18
Source File: uncertainty.py    From modAL with MIT License 6 votes vote down vote up
def classifier_entropy(classifier: BaseEstimator, X: modALinput, **predict_proba_kwargs) -> np.ndarray:
    """
    Entropy of predictions of the for the provided samples.

    Args:
        classifier: The classifier for which the prediction entropy is to be measured.
        X: The samples for which the prediction entropy is to be measured.
        **predict_proba_kwargs: Keyword arguments to be passed for the :meth:`predict_proba` of the classifier.

    Returns:
        Entropy of the class probabilities.
    """
    try:
        classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs)
    except NotFittedError:
        return np.zeros(shape=(X.shape[0], ))

    return np.transpose(entropy(np.transpose(classwise_uncertainty))) 
Example #19
Source File: base.py    From Neuraxle with Apache License 2.0 6 votes vote down vote up
def tosklearn(self):
        class NeuraxleToSKLearnPipelineWrapper(BaseEstimator):
            def __init__(self, neuraxle_step):
                self.p: Union[BaseStep, TruncableSteps] = neuraxle_step

            def set_params(self, **params) -> BaseEstimator:
                self.p.set_hyperparams(HyperparameterSpace(params))
                return self

            def get_params(self, deep=True):
                neuraxle_params = HyperparameterSamples(self.p.get_hyperparams()).to_flat_as_dict_primitive()
                return neuraxle_params

            def get_params_space(self, deep=True):
                neuraxle_params = HyperparameterSpace(self.p.get_hyperparams_space()).to_flat_as_dict_primitive()
                return neuraxle_params

            def fit(self, **args) -> BaseEstimator:
                self.p = self.p.fit(**args)

            def transform(self, **args) -> BaseEstimator:
                return self.p.transform(**args)

        return NeuraxleToSKLearnPipelineWrapper(self) 
Example #20
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 6 votes vote down vote up
def make_pmml_pipeline(obj, active_fields = None, target_fields = None):
	"""Translates a regular Scikit-Learn estimator or pipeline to a PMML pipeline.

	Parameters:
	----------
	obj: BaseEstimator
		The object.

	active_fields: list of strings, optional
		Feature names. If missing, "x1", "x2", .., "xn" are assumed.

	target_fields: list of strings, optional
		Label name(s). If missing, "y" is assumed.

	"""
	steps = _filter_steps(_get_steps(obj))
	pipeline = PMMLPipeline(steps)
	if active_fields is not None:
		pipeline.active_fields = numpy.asarray(active_fields)
	if target_fields is not None:
		pipeline.target_fields = numpy.asarray(target_fields)
	return pipeline 
Example #21
Source File: _normalize.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def normalize_estimator(est):
    """Normalize an estimator.

    Note: Since scikit-learn requires duck-typing, but not sub-typing from
    ``BaseEstimator``, we sometimes need to call this function directly."""
    base = [type(est).__name__, normalize_token(est.get_params())]
    # fitted attributes: https://github.com/dask/dask-ml/issues/658
    attrs = [x for x in dir(est) if x.endswith("_") and not x.startswith("_")]
    exclude = {"cv_results_", "model_history_", "history_", "refit_time_"}

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", FutureWarning)
        for attr in attrs:
            if attr in exclude:
                continue
            try:
                val = getattr(est, attr)
            except (sklearn.exceptions.NotFittedError, AttributeError):
                continue
            base.append(val)
    return tuple(base) 
Example #22
Source File: test_StandardScaler.py    From differential-privacy-library with MIT License 5 votes vote down vote up
def test_class(self):
        from sklearn.base import BaseEstimator
        self.assertTrue(issubclass(StandardScaler, BaseEstimator)) 
Example #23
Source File: decisionboundaryplot.py    From highdimensional-decision-boundary-plot with MIT License 5 votes vote down vote up
def setclassifier(self, estimator=KNeighborsClassifier(n_neighbors=10)):
        """Assign classifier for which decision boundary should be plotted.

        Parameters
        ----------
        estimator : BaseEstimator instance, optional (default=KNeighborsClassifier(n_neighbors=10)).
            Classifier for which the decision boundary should be plotted. Must have
            probability estimates enabled (i.e. estimator.predict_proba must work).
            Make sure it is possible for probability estimates to get close to 0.5
            (more specifically, as close as specified by acceptance_threshold).
        """
        self.classifier = estimator 
Example #24
Source File: predict.py    From healthcareai-py with MIT License 5 votes vote down vote up
def predict_regression(x_test, trained_estimator):
    """
    Given feature data and a trained estimator, return a regression prediction

    Args:
        x_test: 
        trained_estimator (sklearn.base.BaseEstimator): a trained scikit-learn estimator

    Returns:
        a prediction
    """
    validate_estimator(trained_estimator)
    prediction = trained_estimator.predict(x_test)
    return prediction 
Example #25
Source File: predict.py    From healthcareai-py with MIT License 5 votes vote down vote up
def predict_classification(x_test, trained_estimator):
    """
    Given feature data and a trained estimator, return a classification prediction

    Args:
        x_test: 
        trained_estimator (sklearn.base.BaseEstimator): a trained scikit-learn estimator

    Returns:
        a prediction
    """
    validate_estimator(trained_estimator)
    prediction = np.squeeze(trained_estimator.predict_proba(x_test)[:, 1])
    return prediction 
Example #26
Source File: predict.py    From healthcareai-py with MIT License 5 votes vote down vote up
def validate_estimator(possible_estimator):
    """
    Given an object, raise an error if it is not a scikit-learn BaseEstimator

    Args:
        possible_estimator (object): Object of any type.

    Returns:
        True or raises error - the True is used only for testing
    """
    if not issubclass(type(possible_estimator), BaseEstimator):
        raise HealthcareAIError(
            'Predictions require an estimator. You passed in {}, which is of type: {}'.format(possible_estimator,
                                                                                              type(possible_estimator)))
    return True 
Example #27
Source File: test_client.py    From gordo with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_client_download_model(gordo_project, gordo_single_target, ml_server):
    """
    Test client's ability to download the model
    """
    client = Client(project=gordo_project)

    models = client.download_model()
    assert isinstance(models, dict)
    assert isinstance(models[gordo_single_target], BaseEstimator)

    # Can't download model for non-existent target
    with pytest.raises(NotFound):
        client = Client(project=gordo_project)
        client.download_model(targets=["non-existent-target"]) 
Example #28
Source File: test_client.py    From gordo with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_client_cli_download_model(
    gordo_project, gordo_single_target, ml_server, tmpdir
):
    """
    Test proper execution of client predict sub-command
    """
    runner = CliRunner()

    # Empty output directory before downloading
    assert len(os.listdir(tmpdir)) == 0

    out = runner.invoke(
        cli.gordo,
        args=[
            "client",
            "--project",
            gordo_project,
            "download-model",
            str(tmpdir),
            "--target",
            gordo_single_target,
        ],
    )
    assert (
        out.exit_code == 0
    ), f"Expected output code 0 got '{out.exit_code}', {out.output}"

    # Output directory should not be empty any longer
    assert len(os.listdir(tmpdir)) > 0

    model_output_dir = os.path.join(tmpdir, gordo_single_target)
    assert os.path.isdir(model_output_dir)

    model = serializer.load(model_output_dir)
    assert isinstance(model, BaseEstimator) 
Example #29
Source File: test_builder.py    From gordo with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_determine_offset(model: BaseEstimator, expected_offset: int):
    """
    Determine the correct output difference from the model
    """
    X, y = np.random.random((100, 10)), np.random.random((100, 10))
    model.fit(X, y)
    offset = ModelBuilder._determine_offset(model, X)
    assert offset == expected_offset 
Example #30
Source File: diff.py    From gordo with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(
        self,
        base_estimator: BaseEstimator = KerasAutoEncoder(kind="feedforward_hourglass"),
        scaler: TransformerMixin = RobustScaler(),
        require_thresholds: bool = True,
        window=None,
    ):
        """
        Classifier which wraps a ``base_estimator`` and provides a diff error
        based approach to anomaly detection.

        It trains a ``scaler`` to the target **after** training, purely for
        error calculations. The underlying ``base_estimator`` is trained
        with the original, unscaled, ``y``.

        Parameters
        ----------
        base_estimator: sklearn.base.BaseEstimator
            The model to which normal ``.fit``, ``.predict`` methods will be used.
            defaults to py:class:`gordo.machine.model.models.KerasAutoEncoder` with
            ``kind='feedforward_hourglass``
        scaler: sklearn.base.TransformerMixin
            Defaults to ``sklearn.preprocessing.RobustScaler``
            Used for transforming model output and the original ``y`` to calculate
            the difference/error in model output vs expected.
        require_thresholds: bool
            Requires calculating ``thresholds_`` via a call to :func:`~DiffBasedAnomalyDetector.cross_validate`.
            If this is set (default True), but :func:`~DiffBasedAnomalyDetector.cross_validate`
            was not called before calling :func:`~DiffBasedAnomalyDetector.anomaly` an ``AttributeError``
            will be raised.
        window: int
            Window size for smoothed thresholds
        """
        self.base_estimator = base_estimator
        self.scaler = scaler
        self.require_thresholds = require_thresholds
        self.window = window