Python sklearn.metrics.scorer.check_scoring() Examples

The following are 8 code examples of sklearn.metrics.scorer.check_scoring(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics.scorer , or try the search function .
Example #1
Source File: br_kneighbor_classifier.py    From Quadflor with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _optimize_n_neighbors(self, X, y):
        print('Auto optimizing n_neighbors using ' + str(self.n_neighbor_candidates))
        X_train, X_validate, y_train, y_validate = self._get_split(X, y)
        estimator = copy.copy(self)
        estimator.auto_optimize_k = False
        estimator.fit(X_train, y_train)
        scorer = check_scoring(estimator, scoring=self.scoring)
        configs = []
        for n_neighbors in self.n_neighbor_candidates:
            estimator.n_neighbors = n_neighbors
            score = scorer(estimator, X_validate, y_validate)
            print('N_neighbors = ' + str(n_neighbors) + ' score: ' + str(self.scoring) + ' ' + str(score))
            configs.append((n_neighbors, score))

        configs = sorted(configs, key=lambda i: i[1], reverse=True)
        print('Configs in order of score: ')
        pprint.pprint(configs)
        self.n_neighbors = configs[0][0] 
Example #2
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_cross_validate():
    # Compute train and test mse/r2 scores
    cv = KFold(n_splits=5)

    # Regression
    X_reg, y_reg = make_regression(n_samples=30, random_state=0)
    reg = Ridge(random_state=0)

    # Classification
    X_clf, y_clf = make_classification(n_samples=30, random_state=0)
    clf = SVC(kernel="linear", random_state=0)

    for X, y, est in ((X_reg, y_reg, reg), (X_clf, y_clf, clf)):
        # It's okay to evaluate regression metrics on classification too
        mse_scorer = check_scoring(est, 'neg_mean_squared_error')
        r2_scorer = check_scoring(est, 'r2')
        train_mse_scores = []
        test_mse_scores = []
        train_r2_scores = []
        test_r2_scores = []
        fitted_estimators = []
        for train, test in cv.split(X, y):
            est = clone(reg).fit(X[train], y[train])
            train_mse_scores.append(mse_scorer(est, X[train], y[train]))
            train_r2_scores.append(r2_scorer(est, X[train], y[train]))
            test_mse_scores.append(mse_scorer(est, X[test], y[test]))
            test_r2_scores.append(r2_scorer(est, X[test], y[test]))
            fitted_estimators.append(est)

        train_mse_scores = np.array(train_mse_scores)
        test_mse_scores = np.array(test_mse_scores)
        train_r2_scores = np.array(train_r2_scores)
        test_r2_scores = np.array(test_r2_scores)
        fitted_estimators = np.array(fitted_estimators)

        scores = (train_mse_scores, test_mse_scores, train_r2_scores,
                  test_r2_scores, fitted_estimators)

        check_cross_validate_single_metric(est, X, y, scores)
        check_cross_validate_multi_metric(est, X, y, scores) 
Example #3
Source File: _validation.py    From mriqc with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def cross_val_score(
    estimator,
    X,
    y=None,
    groups=None,
    scoring=None,
    cv=None,
    n_jobs=1,
    verbose=0,
    fit_params=None,
    pre_dispatch="2*n_jobs",
):
    """
    Evaluate a score by cross-validation
    """
    if not isinstance(scoring, (list, tuple)):
        scoring = [scoring]

    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    splits = list(cv.split(X, y, groups))
    scorer = [check_scoring(estimator, scoring=s) for s in scoring]
    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
    scores = parallel(
        delayed(_fit_and_score)(
            clone(estimator), X, y, scorer, train, test, verbose, None, fit_params
        )
        for train, test in splits
    )

    group_order = []
    if hasattr(cv, "groups"):
        group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits]
    return np.squeeze(np.array(scores)), group_order 
Example #4
Source File: helper.py    From mriqc with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _score(self, X, y, scoring=None, clf=None):
        from sklearn.model_selection._validation import _score

        if scoring is None:
            scoring = self._scorer

        if clf is None:
            clf = self._estimator

        return _score(clf, X, y, check_scoring(clf, scoring=scoring)) 
Example #5
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_validate():
    # Compute train and test mse/r2 scores
    cv = KFold(n_splits=5)

    # Regression
    X_reg, y_reg = make_regression(n_samples=30, random_state=0)
    reg = Ridge(random_state=0)

    # Classification
    X_clf, y_clf = make_classification(n_samples=30, random_state=0)
    clf = SVC(kernel="linear", random_state=0)

    for X, y, est in ((X_reg, y_reg, reg), (X_clf, y_clf, clf)):
        # It's okay to evaluate regression metrics on classification too
        mse_scorer = check_scoring(est, 'neg_mean_squared_error')
        r2_scorer = check_scoring(est, 'r2')
        train_mse_scores = []
        test_mse_scores = []
        train_r2_scores = []
        test_r2_scores = []
        for train, test in cv.split(X, y):
            est = clone(reg).fit(X[train], y[train])
            train_mse_scores.append(mse_scorer(est, X[train], y[train]))
            train_r2_scores.append(r2_scorer(est, X[train], y[train]))
            test_mse_scores.append(mse_scorer(est, X[test], y[test]))
            test_r2_scores.append(r2_scorer(est, X[test], y[test]))

        train_mse_scores = np.array(train_mse_scores)
        test_mse_scores = np.array(test_mse_scores)
        train_r2_scores = np.array(train_r2_scores)
        test_r2_scores = np.array(test_r2_scores)

        scores = (train_mse_scores, test_mse_scores, train_r2_scores,
                  test_r2_scores)

        yield check_cross_validate_single_metric, est, X, y, scores
        yield check_cross_validate_multi_metric, est, X, y, scores 
Example #6
Source File: _validation.py    From mriqc with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def permutation_test_score(
    estimator,
    X,
    y,
    groups=None,
    cv=None,
    n_permutations=100,
    n_jobs=1,
    random_state=0,
    verbose=0,
    scoring=None,
):
    """
    Evaluate the significance of a cross-validated score with permutations,
    as in test 1 of [Ojala2010]_.

    A modification of original sklearn's permutation test score function
    to evaluate p-value outside this function, so that the score can be
    reused from outside.


    .. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier
                   Performance.  The Journal of Machine Learning Research (2010)
                   vol. 11

    """
    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    scorer = check_scoring(estimator, scoring=scoring)
    random_state = check_random_state(random_state)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
        delayed(_permutation_test_score)(
            clone(estimator), X, _shuffle(y, groups, random_state), groups, cv, scorer
        )
        for _ in range(n_permutations)
    )
    permutation_scores = np.array(permutation_scores)
    return permutation_scores 
Example #7
Source File: helpers.py    From lale with Apache License 2.0 4 votes vote down vote up
def cross_val_score_track_trials(estimator, X, y=None, scoring=accuracy_score, cv=5, args_to_scorer=None):
    """
    Use the given estimator to perform fit and predict for splits defined by 'cv' and compute the given score on 
    each of the splits.

    Parameters
    ----------

    estimator: A valid sklearn_wrapper estimator
    X, y: Valid data and target values that work with the estimator
    scoring: string or a scorer object created using 
        https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html#sklearn.metrics.make_scorer.
        A string from sklearn.metrics.SCORERS.keys() can be used or a scorer created from one of 
        sklearn.metrics (https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics).
        A completely custom scorer object can be created from a python function following the example at 
        https://scikit-learn.org/stable/modules/model_evaluation.html
        The metric has to return a scalar value,
    cv: an integer or an object that has a split function as a generator yielding (train, test) splits as arrays of indices.
        Integer value is used as number of folds in sklearn.model_selection.StratifiedKFold, default is 5.
        Note that any of the iterators from https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators can be used here.
    args_to_scorer: A dictionary of additional keyword arguments to pass to the scorer. 
                Used for cases where the scorer has a signature such as ``scorer(estimator, X, y, **kwargs)``.
    Returns
    -------
        cv_results: a list of scores corresponding to each cross validation fold
    """
    if isinstance(cv, int):
        cv = StratifiedKFold(cv)

    if args_to_scorer is None:
        args_to_scorer={}
    scorer = check_scoring(estimator, scoring=scoring)
    cv_results:List[float] = []
    log_loss_results = []
    time_results = []
    for train, test in cv.split(X, y):
        X_train, y_train = split_with_schemas(estimator, X, y, train)
        X_test, y_test = split_with_schemas(estimator, X, y, test, train)
        start = time.time()
        #Not calling sklearn.base.clone() here, because:
        #  (1) For Lale pipelines, clone() calls the pipeline constructor
        #      with edges=None, so the resulting topology is incorrect.
        #  (2) For Lale individual operators, the fit() method already
        #      clones the impl object, so cloning again is redundant.
        trained = estimator.fit(X_train, y_train)
        score_value  = scorer(trained, X_test, y_test, **args_to_scorer)
        execution_time = time.time() - start
        # not all estimators have predict probability
        try:
            y_pred_proba = trained.predict_proba(X_test)
            logloss = log_loss(y_true=y_test, y_pred=y_pred_proba)
            log_loss_results.append(logloss)
        except BaseException:
            logger.debug("Warning, log loss cannot be computed")
        cv_results.append(score_value)
        time_results.append(execution_time)
    result = np.array(cv_results).mean(), np.array(log_loss_results).mean(), np.array(execution_time).mean()
    return result 
Example #8
Source File: smac.py    From lale with Apache License 2.0 4 votes vote down vote up
def fit(self, X_train, y_train):
        self.cv = check_cv(self.cv, y = y_train, classifier=True) #TODO: Replace the classifier flag value by using tags?

        def smac_train_test(trainable, X_train, y_train):
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(trainable, X_train, y_train, cv=self.cv, scoring=self.scoring)
                logger.debug("Successful trial of SMAC")
            except BaseException as e:
                #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    X_train_part, X_validation, y_train_part, y_validation = train_test_split(X_train, y_train, test_size=0.20)
                    start = time.time()
                    trained = trainable.fit(X_train_part, y_train_part)
                    scorer = check_scoring(trainable, scoring=self.scoring)
                    cv_score  = scorer(trained, X_validation, y_validation)
                    execution_time = time.time() - start
                    y_pred_proba = trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                else:
                    logger.debug("Error {} with pipeline:{}".format(e, trainable.to_json()))
                    raise e
            return cv_score, logloss, execution_time

        def f(trainable):
            return_dict = {}
            try:
                score, logloss, execution_time = smac_train_test(trainable, X_train=X_train, y_train=y_train)
                return_dict = {
                    'loss': self.best_score - score,
                    'time': execution_time,
                    'log_loss': logloss
                }
            except BaseException as e:
                logger.warning(f"Exception caught in SMACCV:{type(e)}, {traceback.format_exc()}, SMAC will set a cost_for_crash to MAXINT.")
                raise e
            return return_dict['loss']

        try :
            smac = orig_SMAC(scenario=self.scenario, rng=np.random.RandomState(42),
                    tae_runner=lale_op_smac_tae(self.estimator, f))
            incumbent = smac.optimize()
            self.trials = smac.get_runhistory()
            trainable = lale_trainable_op_from_config(self.estimator, incumbent)
            #get the trainable corresponding to the best params and train it on the entire training dataset.
            trained = trainable.fit(X_train, y_train)
            self._best_estimator = trained
        except BudgetExhaustedException:
            logger.warning('Maximum alloted optimization time exceeded. Optimization exited prematurely')
        except BaseException as e:
            logger.warning('Error during optimization: {}'.format(e))
            self._best_estimator = None

        return self