Python sklearn.metrics.check_scoring() Examples

The following are 9 code examples of sklearn.metrics.check_scoring(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics , or try the search function .
Example #1
Source File: scorer.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def check_scoring(estimator, scoring=None, **kwargs):
    res = sklearn_check_scoring(estimator, scoring=scoring, **kwargs)
    if callable(scoring):
        # Heuristic to ensure user has not passed a metric
        module = getattr(scoring, "__module__", None)
        if (
            hasattr(module, "startswith")
            and module.startswith("dask_ml.metrics.")
            and not module.startswith("dask_ml.metrics.scorer")
            and not module.startswith("dask_ml.metrics.tests.")
        ):
            raise ValueError(
                "scoring value %r looks like it is a metric "
                "function rather than a scorer. A scorer should "
                "require an estimator as its first parameter. "
                "Please use `make_scorer` to convert a metric "
                "to a scorer." % scoring
            )
    if scoring in SCORERS.keys():
        func, kwargs = SCORERS[scoring]
        return make_scorer(func, **kwargs)
    return res 
Example #2
Source File: _incremental.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _validate_parameters(self, X, y):
        if (self.max_iter is not None) and self.max_iter < 1:
            raise ValueError(
                "Received max_iter={}. max_iter < 1 is not supported".format(
                    self.max_iter
                )
            )

        # Make sure dask arrays are passed so error on unknown chunk size is raised
        if isinstance(X, dd.DataFrame):
            X = X.to_dask_array()
        if isinstance(y, (dd.DataFrame, dd.Series)):
            y = y.to_dask_array()
        kwargs = dict(accept_unknown_chunks=False, accept_dask_dataframe=False)
        X = self._check_array(X, **kwargs)
        y = self._check_array(y, ensure_2d=False, **kwargs)
        scorer = check_scoring(self.estimator, scoring=self.scoring)
        return X, y, scorer 
Example #3
Source File: search.py    From sk-dist with Apache License 2.0 6 votes vote down vote up
def _fit_one_fold(fit_set, models, X, y, scoring, fit_params):
    """
    Fits the given estimator on one fold of training data.
    Scores the fitted estimator against the test fold.
    """
    train = fit_set[0][0]
    test = fit_set[0][1]
    estimator_ = _clone(models[fit_set[1]["model_index"]][1])
    parameters = fit_set[1]["param_set"]
    X_train, y_train = _safe_split(estimator_, X, y, train)
    X_test, y_test = _safe_split(estimator_, X, y, test, train)
    if parameters is not None:
        estimator_.set_params(**parameters)
    estimator_.fit(X_train, y_train, **fit_params)
    scorer = check_scoring(estimator_, scoring=scoring)
    is_multimetric = not callable(scorer)
    out_dct = fit_set[1]
    out_dct["score"] = _score(
        estimator_, X_test, y_test, 
        scorer, is_multimetric
        )
    return out_dct 
Example #4
Source File: scoring.py    From skorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _scoring(self, net, X_test, y_test):
        """Resolve scoring and apply it to data. Use cached prediction
        instead of running inference again, if available."""
        scorer = check_scoring(net, self.scoring_)
        return scorer(net, X_test, y_test) 
Example #5
Source File: flofo_importance.py    From lofo-importance with MIT License 5 votes vote down vote up
def __init__(self, trained_model, validation_df, features, target,
                 scoring, n_jobs=None):
        self.trained_model = trained_model
        self.df = validation_df.copy()
        self.features = features
        self.target = target
        self.n_jobs = n_jobs
        self.scorer = check_scoring(estimator=self.trained_model, scoring=scoring)

        # FLOFO defaults
        self.num_bins = 10
        self.shuffle_func = np.random.permutation
        self.feature_group_len = 2
        self.num_sampling = 10

        min_data_needed = 10*(self.num_bins**self.feature_group_len)
        if self.df.shape[0] < min_data_needed:
            raise Exception("Small validation set (<{})".format(min_data_needed))
        if len(self.features) <= self.feature_group_len:
            raise Exception("FLOFO needs more than {} features".format(self.feature_group_len))

        if self.n_jobs is not None and self.n_jobs > 1:
            warning_str = ("Warning: If your model is multithreaded, please initialise the number"
                           "of jobs of LOFO to be equal to 1, otherwise you may experience performance issues.")
            warnings.warn(warning_str)

        self._bin_features() 
Example #6
Source File: single_best.py    From DESlib with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _estimate_performances(self, X, y):
        performances = np.zeros(self.n_classifiers_)
        for idx, clf in enumerate(self.pool_classifiers_):
            scorer = check_scoring(clf, self.scoring)
            performances[idx] = scorer(clf, X, y)
        return performances 
Example #7
Source File: utils.py    From verde with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def score_estimator(scoring, estimator, coordinates, data, weights=None):
    """
    Score the given gridder against the given data using the given metric.

    If the data and predictions have more than 1 component, the scores of each
    component will be averaged.

    Parameters
    ----------
    scoring : str or callable
        A scoring specification known to scikit-learn. See
        :func:`sklearn.metrics.check_scoring`.
    estimator : a Verde gridder
        The gridder to score. Usually derived from
        :class:`verde.base.BaseGridder`.
    coordinates : tuple of arrays
        Arrays with the coordinates of each data point. Should be in the
        following order: (easting, northing, vertical, ...).
        For the specific definition of coordinate systems and what these
        names mean, see the class docstring.
    data : array or tuple of arrays
        The data values of each data point. If the data has more than one
        component, *data* must be a tuple of arrays (one for each
        component).
    weights : None or array or tuple of arrays
        If not None, then the weights assigned to each data point. If more
        than one data component is provided, you must provide a weights
        array for each data component (if not None).

    Returns
    -------
    score : float
        The score.

    """
    coordinates, data, weights = check_fit_input(
        coordinates, data, weights, unpack=False
    )
    predicted = check_data(estimator.predict(coordinates))
    scorer = check_scoring(DummyEstimator, scoring=scoring)
    result = np.mean(
        [
            scorer(
                DummyEstimator(pred.ravel()),
                coordinates,
                data[i].ravel(),
                sample_weight=weights[i],
            )
            for i, pred in enumerate(predicted)
        ]
    )
    return result 
Example #8
Source File: utils.py    From sk-dist with Apache License 2.0 4 votes vote down vote up
def _check_multimetric_scoring(estimator, scoring=None):
    """ Check the scoring parameter in cases when multiple metrics are allowed """
    if callable(scoring) or scoring is None or isinstance(scoring,
                                                          str):
        scorers = {"score": check_scoring(estimator, scoring=scoring)}
        return scorers, False
    else:
        err_msg_generic = ("scoring should either be a single string or "
                           "callable for single metric evaluation or a "
                           "list/tuple of strings or a dict of scorer name "
                           "mapped to the callable for multiple metric "
                           "evaluation. Got %s of type %s"
                           % (repr(scoring), type(scoring)))

        if isinstance(scoring, (list, tuple, set)):
            err_msg = ("The list/tuple elements must be unique "
                       "strings of predefined scorers. ")
            invalid = False
            try:
                keys = set(scoring)
            except TypeError:
                invalid = True
            if invalid:
                raise ValueError(err_msg)

            if len(keys) != len(scoring):
                raise ValueError(err_msg + "Duplicate elements were found in"
                                 " the given list. %r" % repr(scoring))
            elif len(keys) > 0:
                if not all(isinstance(k, str) for k in keys):
                    if any(callable(k) for k in keys):
                        raise ValueError(err_msg +
                                         "One or more of the elements were "
                                         "callables. Use a dict of score name "
                                         "mapped to the scorer callable. "
                                         "Got %r" % repr(scoring))
                    else:
                        raise ValueError(err_msg +
                                         "Non-string types were found in "
                                         "the given list. Got %r"
                                         % repr(scoring))
                scorers = {scorer: check_scoring(estimator, scoring=scorer)
                           for scorer in scoring}
            else:
                raise ValueError(err_msg +
                                 "Empty list was given. %r" % repr(scoring))

        elif isinstance(scoring, dict):
            keys = set(scoring)
            if not all(isinstance(k, str) for k in keys):
                raise ValueError("Non-string types were found in the keys of "
                                 "the given dict. scoring=%r" % repr(scoring))
            if len(keys) == 0:
                raise ValueError("An empty dict was passed. %r"
                                 % repr(scoring))
            scorers = {key: check_scoring(estimator, scoring=scorer)
                       for key, scorer in scoring.items()}
        else:
            raise ValueError(err_msg_generic)
        return scorers, True 
Example #9
Source File: static_selection.py    From DESlib with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def fit(self, X, y):
        """Fit the static selection model by select an ensemble of classifier
        containing the base classifiers with highest accuracy in the given
        dataset.

        Parameters
        ----------
        X : array of shape (n_samples, n_features)
            Data used to fit the model.

        y : array of shape (n_samples)
            class labels of each example in X.

        Returns
        -------
        self : object
            Returns self.
        """
        self._validate_parameters()

        X, y = check_X_y(X, y)

        super(StaticSelection, self).fit(X, y)

        self.n_classifiers_ensemble_ = int(
            self.n_classifiers_ * self.pct_classifiers)

        performances = np.zeros(self.n_classifiers_)

        if not self.base_already_encoded_:
            y_encoded = y
        else:
            y_encoded = self.enc_.transform(y)

        for clf_idx, clf in enumerate(self.pool_classifiers_):
            scorer = check_scoring(clf, self.scoring)
            performances[clf_idx] = scorer(clf, X, y_encoded)

        self.clf_indices_ = np.argsort(performances)[::-1][
                            0:self.n_classifiers_ensemble_]
        self.ensemble_ = [self.pool_classifiers_[clf_idx] for clf_idx in
                          self.clf_indices_]

        return self