Python sklearn.base() Examples

Example #1
Source File:    From gordo with GNU Affero General Public License v3.0 6 votes vote down vote up
def _determine_offset(
        model: BaseEstimator, X: Union[np.ndarray, pd.DataFrame]
    ) -> int:
        Determine the model's offset. How much does the output of the model differ
        from its input?

        model: sklearn.base.BaseEstimator
            Trained model with either ``predict`` or ``transform`` method, preference
            given to ``predict``.
        X: Union[np.ndarray, pd.DataFrame]
            Data to pass to the model's ``predict`` or ``transform`` method.

            The difference between X and the model's output lengths.
        out = model.predict(X) if hasattr(model, "predict") else model.transform(X)
        return len(X) - len(out) 
Example #2
Source File:    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_repr():
    # Smoke test the repr of the base estimator.
    my_estimator = MyEstimator()
    test = T(K(), K())
        "T(a=K(c=None, d=None), b=K(c=None, d=None))"

    some_est = T(a=["long_params"] * 1000)
    assert_equal(len(repr(some_est)), 495) 
Example #3
Source File:    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_str():
    # Smoke test the str of the base estimator
    my_estimator = MyEstimator()
Example #4
Source File:    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_all_estimator_no_base_class():
    # test that all_estimators doesn't find abstract classes.
    for name, Estimator in all_estimators():
        msg = ("Base estimators such as {0} should not be included"
               " in all_estimators").format(name)
        assert not name.lower().startswith('base'), msg 
Example #5
Source File:    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_root_import_all_completeness():
    EXCEPTIONS = ('utils', 'tests', 'base', 'setup', 'conftest')
    for _, modname, _ in pkgutil.walk_packages(path=sklearn.__path__,
                                               onerror=lambda _: None):
        if '.' in modname or modname.startswith('_') or modname in EXCEPTIONS:
        assert_in(modname, sklearn.__all__) 
Example #6
Source File:    From lale with Apache License 2.0 5 votes vote down vote up
def test_wrap_from_instance(self):
        from lale.operators import make_operator, TrainableIndividualOp
        from lale.sklearn_compat import make_sklearn_compat
        from sklearn.base import clone
        self.assertFalse(isinstance(UnknownOp, TrainableIndividualOp))
        instance = UnknownOp(n_neighbors=3)
        self.assertFalse(isinstance(instance, TrainableIndividualOp))
        wrapped = make_operator(instance)
        self.assertTrue(isinstance(wrapped, TrainableIndividualOp))
        self.assertEqual(wrapped.hyperparams(), {'n_neighbors': 3})
        cloned = clone(make_sklearn_compat(wrapped)).to_lale()
        self.assertTrue(isinstance(cloned, TrainableIndividualOp))
        self.assertEqual(cloned.hyperparams(), {'n_neighbors': 3}) 
Example #7
Source File:    From lale with Apache License 2.0 5 votes vote down vote up
def _clone_impl(self):
        impl_instance = self._impl_instance()
        if hasattr(impl_instance, 'get_params'):
            result = sklearn.base.clone(impl_instance)
                result = copy.deepcopy(impl_instance)
                impl_class = self._impl_class()
                params_all = self._get_params_all()
                result = impl_class(**params_all)
        return result 
Example #8
Source File:    From lale with Apache License 2.0 5 votes vote down vote up
def __constructor_for_cloning(self, steps:List[OpType]):
        edges:List[Tuple[OpType, OpType]] = []
        prev_op:Optional[OpType] = None
        #This is due to scikit base's clone method that needs the same list object
        self._steps = steps

        for curr_op in self._steps:
            if isinstance(prev_op, BasePipeline):
                prev_leaves = prev_op._find_sink_nodes()
                prev_leaves = [] if prev_op is None else [prev_op]
            if isinstance(curr_op, BasePipeline):
                curr_roots = curr_op._find_source_nodes()
                curr_roots = [curr_op]
            edges.extend([(src, tgt) for src in prev_leaves for tgt in curr_roots])
            prev_op = curr_op

        seen_steps:List[OpType] = []
        for step in self._steps:
            if step in seen_steps:
                raise ValueError('Same instance of {} already exists in the pipeline. '\
                'This is not allowed.'.format(
        self._preds = { step: [] for step in self._steps }
        for (src, dst) in edges:
        #Since this case is only allowed for linear pipelines, it is always
        #expected to be in topological order
        assert self.__is_in_topological_order() 
Example #9
Source File:    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_repr():
    # Smoke test the repr of the base estimator.
    my_estimator = MyEstimator()
    test = T(K(), K())
        "T(a=K(c=None, d=None), b=K(c=None, d=None))"

    some_est = T(a=["long_params"] * 1000)
    assert_equal(len(repr(some_est)), 415) 
Example #10
Source File:    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_str():
    # Smoke test the str of the base estimator
    my_estimator = MyEstimator()
Example #11
Source File:    From gordo with GNU Affero General Public License v3.0 4 votes vote down vote up
def _extract_metadata_from_model(
        model: BaseEstimator, metadata: dict = dict()
    ) -> dict:
        Recursively check for :class:`gordo.machine.model.base.GordoBase` in a
        given ``model``. If such the model exists buried inside of a
        :class:`sklearn.pipeline.Pipeline` which is then part of another
        :class:`sklearn.base.BaseEstimator`, this function will return its metadata.

        model: BaseEstimator
        metadata: dict
            Any initial starting metadata, but is mainly meant to be used during
            the recursive calls to accumulate any multiple
            :class:`gordo.machine.model.base.GordoBase` models found in this model

        If there is a ``GordoBase`` model inside of a ``Pipeline`` which is not the final
        step, this function will not find it.

            Dictionary representing accumulated calls to
        metadata = metadata.copy()

        # If it's a Pipeline, only need to get the last step, which potentially has metadata
        if isinstance(model, Pipeline):
            final_step = model.steps[-1][1]
            return metadata

        # GordoBase is simple, having a .get_metadata()
        if isinstance(model, GordoBase):

        # Continue to look at object values in case, we decided to have a GordoBase
        # which also had a GordoBase as a parameter/attribute, but will satisfy BaseEstimators
        # which can take a GordoBase model as a parameter, which will then have metadata to get
        for val in model.__dict__.values():
            if isinstance(val, Pipeline):
            elif isinstance(val, GordoBase) or isinstance(val, BaseEstimator):
        return metadata 
Example #12
Source File:    From rasa_core with Apache License 2.0 4 votes vote down vote up
def __init__(
        featurizer: Optional[MaxHistoryTrackerFeaturizer] = None,
        priority: int = 1,
        model: Optional['sklearn.base.BaseEstimator'] = None,
        param_grid: Optional[Dict[Text, List] or List[Dict]] = None,
        cv: Optional[int] = None,
        scoring: Optional[Text or List or Dict or Callable] = 'accuracy',
        label_encoder: LabelEncoder = LabelEncoder(),
        shuffle: bool = True,
        **kwargs: Any
    ) -> None:
        """Create a new sklearn policy.

            featurizer: Featurizer used to convert the training data into
                vector format.
            model: The sklearn model or model pipeline.
            param_grid: If *param_grid* is not None and *cv* is given,
                a grid search on the given *param_grid* is performed
                (e.g. *param_grid={'n_estimators': [50, 100]}*).
            cv: If *cv* is not None, perform a cross validation on
                the training data. *cv* should then conform to the
                sklearn standard (e.g. *cv=5* for a 5-fold cross-validation).
            scoring: Scoring strategy, using the sklearn standard.
            label_encoder: Encoder for the labels. Must implement an
                *inverse_transform* method.
            shuffle: Whether to shuffle training data.

        if featurizer:
            if not isinstance(featurizer, MaxHistoryTrackerFeaturizer):
                raise TypeError("Passed featurizer of type {}, should be "
        super(SklearnPolicy, self).__init__(featurizer, priority)

        self.model = model or self._default_model() = cv
        self.param_grid = param_grid
        self.scoring = scoring
        self.label_encoder = label_encoder
        self.shuffle = shuffle

        # attributes that need to be restored after loading
        self._pickle_params = [
            'model', 'cv', 'param_grid', 'scoring', 'label_encoder']
        self._train_params = kwargs 
Example #13
Source File:    From tslearn with BSD 2-Clause "Simplified" License 4 votes vote down vote up
def get_estimators(type_filter='all'):
    """Return a list of classes that inherit from `sklearn.BaseEstimator`.
    This code is based on `sklearn.utils.testing.all_estimators`.

    type_filter : str (default: 'all')
        A value in ['all', 'classifier', 'transformer', 'cluster'] which
        defines which type of estimators to retrieve

        Collection of estimators of the type specified in `type_filter`

    if type_filter not in ['all', 'classifier', 'transformer', 'cluster']:
        # TODO: make this exception more specific
        raise Exception("type_filter should be element of "
                        "['all', 'classifier', 'transformer', 'cluster']")

    all_classes = _get_all_classes()

    # Filter out those that are not a subclass of `sklearn.BaseEstimator`
    all_classes = [c for c in set(all_classes)
                   if issubclass(c[1], BaseEstimator)]

    # get rid of abstract base classes
    all_classes = filter(lambda c: not is_abstract(c[1]), all_classes)

    # only keep those that are from tslearn
    all_classes = filter(lambda c: not is_sklearn(c[1]), all_classes)

    # Now filter out the estimators that are not of the specified type
    filters = {
        'all': [ClassifierMixin, RegressorMixin,
                TransformerMixin, ClusterMixin],
        'classifier': [ClassifierMixin],
        'transformer': [TransformerMixin],
        'cluster': [ClusterMixin]
    filtered_classes = []
    for _class in all_classes:
        if any([issubclass(_class[1], mixin) for mixin in filters]):

    # Remove duplicates and return the list of remaining estimators
    return sorted(set(filtered_classes), key=itemgetter(0)) 
Example #14
Source File:    From tslearn with BSD 2-Clause "Simplified" License 4 votes vote down vote up
def check_estimator(Estimator):
    """Check if estimator adheres to scikit-learn conventions.
    This estimator will run an extensive test-suite for input validation,
    shapes, etc.
    Additional tests for classifiers, regressors, clustering or transformers
    will be run if the Estimator class inherits from the corresponding mixin
    from sklearn.base.
    This test can be applied to classes or instances.
    Classes currently have some additional tests that related to construction,
    while passing instances allows the testing of multiple options.
    estimator : estimator object or class
        Estimator to check. Estimator is a class object or instance.
    if isinstance(Estimator, type):
        # got a class
        name = Estimator.__name__
        estimator = Estimator()

        check_parameters_default_constructible(name, Estimator)
        check_no_attributes_set_in_init(name, estimator)
        # got an instance
        estimator = Estimator
        name = type(estimator).__name__

    if hasattr(estimator, 'max_iter'):
        if (isinstance(estimator, LearningShapelets) or
                isinstance(estimator, SerializableShapeletModel)):
    if hasattr(estimator, 'total_lengths'):
    if hasattr(estimator, 'probability'):

    for check in checks._yield_all_checks(name, estimator):
            check(name, estimator)
        except SkipTest as exception:
            # the only SkipTest thrown currently results from not
            # being able to import pandas.
            warnings.warn(str(exception), SkipTestWarning) 
Example #15
Source File:    From rasa-for-botfront with Apache License 2.0 4 votes vote down vote up
def __init__(
        featurizer: Optional[MaxHistoryTrackerFeaturizer] = None,
        priority: int = DEFAULT_POLICY_PRIORITY,
        model: Optional["sklearn.base.BaseEstimator"] = None,
        param_grid: Optional[Dict[Text, List] or List[Dict]] = None,
        cv: Optional[int] = None,
        scoring: Optional[Text or List or Dict or Callable] = "accuracy",
        label_encoder: LabelEncoder = LabelEncoder(),
        shuffle: bool = True,
        **kwargs: Any,
    ) -> None:
        """Create a new sklearn policy.

            featurizer: Featurizer used to convert the training data into
                vector format.
            model: The sklearn model or model pipeline.
            param_grid: If *param_grid* is not None and *cv* is given,
                a grid search on the given *param_grid* is performed
                (e.g. *param_grid={'n_estimators': [50, 100]}*).
            cv: If *cv* is not None, perform a cross validation on
                the training data. *cv* should then conform to the
                sklearn standard (e.g. *cv=5* for a 5-fold cross-validation).
            scoring: Scoring strategy, using the sklearn standard.
            label_encoder: Encoder for the labels. Must implement an
                *inverse_transform* method.
            shuffle: Whether to shuffle training data.

        if featurizer:
            if not isinstance(featurizer, MaxHistoryTrackerFeaturizer):
                raise TypeError(
                    "Passed featurizer of type {}, should be "
        super().__init__(featurizer, priority)

        self.model = model or self._default_model() = cv
        self.param_grid = param_grid
        self.scoring = scoring
        self.label_encoder = label_encoder
        self.shuffle = shuffle

        # attributes that need to be restored after loading
        self._pickle_params = ["model", "cv", "param_grid", "scoring", "label_encoder"]
        self._train_params = kwargs 
Example #16
Source File:    From spark-sklearn with Apache License 2.0 4 votes vote down vote up
def __init__(self, sklearnEstimator=None, keyCols=["key"], xCol="features",
                 outputCol="output", yCol=None, estimatorType=None):
        """For all instances, the ordered list of ``keyCols`` determine the set of groups which each
        ``sklearnEstimator`` is applied to.

        For every unique ``keyCols`` value, the remaining columns are aggregated and used to train
        the scikit-learn estimator.

        ``estimatorType`` inference is conducted as follows: if ``yCol`` is specified, then this is
        assumed to be of ``"predictor"`` type, else a ``"transformer"`` or a ``"clusterer"``,
        depending on the estimator having the ``transform()`` or ``fit_predict()`` attributes, with
        ``"clusterer"`` being chosen in case both attributes are present.

        :param sklearnEstimator: An instance of a scikit-learn estimator, with parameters configured
                                 as desired for each user.
        :param keyCols: Key column names list used to group data to which models are applied, where
                        order implies lexicographical importance.
        :param xCol: Name of column of input features used for training and
        :param yCol: Specifies name of label column for regression or classification pipelines.
                     Required for predictors, must be unspecified or ``None`` for transformers.
        :param estimatorType: Identifies the type of scikit-learn estimator being used, which
                              changes the interface the ``sklearnEstimator`` is expected to have.
                              This parameter's value is inferred using reflection by default,
                              but may be manually overriden.

        :raise ValueError: if ``sklearnEstimator`` is ``None``.
        :raise ValueError: if ``sklearnEstimator`` does not derive from
        :raise ValueError: if ``keyCols`` is empty.
        :raise ValueError: if any column has the name ``"estimator"``
        :raise AttributeError: if reflection checks indicate that parameter estimator is not equipped
                               with a ``fit()`` method.
        if sklearnEstimator is None:
            raise ValueError("sklearnEstimator should be specified")
        if not isinstance(sklearnEstimator, sklearn.base.BaseEstimator):
            raise ValueError("sklearnEstimator should be an sklearn.base.BaseEstimator")
        if len(keyCols) == 0:
            raise ValueError("keyCols should not be empty")
        if "estimator" in keyCols + [xCol, yCol]:
            raise ValueError("keyCols should not contain a column named \"estimator\"")

        # The superclass expects Param attributes to already be set, so we only init it after
        # doing so.
        for paramName, paramSpec in KeyedEstimator._paramSpecs.items():
            setattr(self, paramName, Param(Params._dummy(), paramName, paramSpec["doc"]))
        super(KeyedEstimator, self).__init__()
        self._setDefault(**{paramName: paramSpec["default"]
                            for paramName, paramSpec in KeyedEstimator._paramSpecs.items()
                            if "default" in paramSpec})
        kwargs = KeyedEstimator._inferredParams(sklearnEstimator, self._input_kwargs)
