Python sklearn.metrics.scorer.check_scoring() Examples
The following are 8
code examples of sklearn.metrics.scorer.check_scoring().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.metrics.scorer
, or try the search function
.
Example #1
Source File: br_kneighbor_classifier.py From Quadflor with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _optimize_n_neighbors(self, X, y): print('Auto optimizing n_neighbors using ' + str(self.n_neighbor_candidates)) X_train, X_validate, y_train, y_validate = self._get_split(X, y) estimator = copy.copy(self) estimator.auto_optimize_k = False estimator.fit(X_train, y_train) scorer = check_scoring(estimator, scoring=self.scoring) configs = [] for n_neighbors in self.n_neighbor_candidates: estimator.n_neighbors = n_neighbors score = scorer(estimator, X_validate, y_validate) print('N_neighbors = ' + str(n_neighbors) + ' score: ' + str(self.scoring) + ' ' + str(score)) configs.append((n_neighbors, score)) configs = sorted(configs, key=lambda i: i[1], reverse=True) print('Configs in order of score: ') pprint.pprint(configs) self.n_neighbors = configs[0][0]
Example #2
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_cross_validate(): # Compute train and test mse/r2 scores cv = KFold(n_splits=5) # Regression X_reg, y_reg = make_regression(n_samples=30, random_state=0) reg = Ridge(random_state=0) # Classification X_clf, y_clf = make_classification(n_samples=30, random_state=0) clf = SVC(kernel="linear", random_state=0) for X, y, est in ((X_reg, y_reg, reg), (X_clf, y_clf, clf)): # It's okay to evaluate regression metrics on classification too mse_scorer = check_scoring(est, 'neg_mean_squared_error') r2_scorer = check_scoring(est, 'r2') train_mse_scores = [] test_mse_scores = [] train_r2_scores = [] test_r2_scores = [] fitted_estimators = [] for train, test in cv.split(X, y): est = clone(reg).fit(X[train], y[train]) train_mse_scores.append(mse_scorer(est, X[train], y[train])) train_r2_scores.append(r2_scorer(est, X[train], y[train])) test_mse_scores.append(mse_scorer(est, X[test], y[test])) test_r2_scores.append(r2_scorer(est, X[test], y[test])) fitted_estimators.append(est) train_mse_scores = np.array(train_mse_scores) test_mse_scores = np.array(test_mse_scores) train_r2_scores = np.array(train_r2_scores) test_r2_scores = np.array(test_r2_scores) fitted_estimators = np.array(fitted_estimators) scores = (train_mse_scores, test_mse_scores, train_r2_scores, test_r2_scores, fitted_estimators) check_cross_validate_single_metric(est, X, y, scores) check_cross_validate_multi_metric(est, X, y, scores)
Example #3
Source File: _validation.py From mriqc with BSD 3-Clause "New" or "Revised" License | 5 votes |
def cross_val_score( estimator, X, y=None, groups=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch="2*n_jobs", ): """ Evaluate a score by cross-validation """ if not isinstance(scoring, (list, tuple)): scoring = [scoring] X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) splits = list(cv.split(X, y, groups)) scorer = [check_scoring(estimator, scoring=s) for s in scoring] # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) scores = parallel( delayed(_fit_and_score)( clone(estimator), X, y, scorer, train, test, verbose, None, fit_params ) for train, test in splits ) group_order = [] if hasattr(cv, "groups"): group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits] return np.squeeze(np.array(scores)), group_order
Example #4
Source File: helper.py From mriqc with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _score(self, X, y, scoring=None, clf=None): from sklearn.model_selection._validation import _score if scoring is None: scoring = self._scorer if clf is None: clf = self._estimator return _score(clf, X, y, check_scoring(clf, scoring=scoring))
Example #5
Source File: test_validation.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_cross_validate(): # Compute train and test mse/r2 scores cv = KFold(n_splits=5) # Regression X_reg, y_reg = make_regression(n_samples=30, random_state=0) reg = Ridge(random_state=0) # Classification X_clf, y_clf = make_classification(n_samples=30, random_state=0) clf = SVC(kernel="linear", random_state=0) for X, y, est in ((X_reg, y_reg, reg), (X_clf, y_clf, clf)): # It's okay to evaluate regression metrics on classification too mse_scorer = check_scoring(est, 'neg_mean_squared_error') r2_scorer = check_scoring(est, 'r2') train_mse_scores = [] test_mse_scores = [] train_r2_scores = [] test_r2_scores = [] for train, test in cv.split(X, y): est = clone(reg).fit(X[train], y[train]) train_mse_scores.append(mse_scorer(est, X[train], y[train])) train_r2_scores.append(r2_scorer(est, X[train], y[train])) test_mse_scores.append(mse_scorer(est, X[test], y[test])) test_r2_scores.append(r2_scorer(est, X[test], y[test])) train_mse_scores = np.array(train_mse_scores) test_mse_scores = np.array(test_mse_scores) train_r2_scores = np.array(train_r2_scores) test_r2_scores = np.array(test_r2_scores) scores = (train_mse_scores, test_mse_scores, train_r2_scores, test_r2_scores) yield check_cross_validate_single_metric, est, X, y, scores yield check_cross_validate_multi_metric, est, X, y, scores
Example #6
Source File: _validation.py From mriqc with BSD 3-Clause "New" or "Revised" License | 4 votes |
def permutation_test_score( estimator, X, y, groups=None, cv=None, n_permutations=100, n_jobs=1, random_state=0, verbose=0, scoring=None, ): """ Evaluate the significance of a cross-validated score with permutations, as in test 1 of [Ojala2010]_. A modification of original sklearn's permutation test score function to evaluate p-value outside this function, so that the score can be reused from outside. .. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier Performance. The Journal of Machine Learning Research (2010) vol. 11 """ X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer = check_scoring(estimator, scoring=scoring) random_state = check_random_state(random_state) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_permutation_test_score)( clone(estimator), X, _shuffle(y, groups, random_state), groups, cv, scorer ) for _ in range(n_permutations) ) permutation_scores = np.array(permutation_scores) return permutation_scores
Example #7
Source File: helpers.py From lale with Apache License 2.0 | 4 votes |
def cross_val_score_track_trials(estimator, X, y=None, scoring=accuracy_score, cv=5, args_to_scorer=None): """ Use the given estimator to perform fit and predict for splits defined by 'cv' and compute the given score on each of the splits. Parameters ---------- estimator: A valid sklearn_wrapper estimator X, y: Valid data and target values that work with the estimator scoring: string or a scorer object created using https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html#sklearn.metrics.make_scorer. A string from sklearn.metrics.SCORERS.keys() can be used or a scorer created from one of sklearn.metrics (https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics). A completely custom scorer object can be created from a python function following the example at https://scikit-learn.org/stable/modules/model_evaluation.html The metric has to return a scalar value, cv: an integer or an object that has a split function as a generator yielding (train, test) splits as arrays of indices. Integer value is used as number of folds in sklearn.model_selection.StratifiedKFold, default is 5. Note that any of the iterators from https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators can be used here. args_to_scorer: A dictionary of additional keyword arguments to pass to the scorer. Used for cases where the scorer has a signature such as ``scorer(estimator, X, y, **kwargs)``. Returns ------- cv_results: a list of scores corresponding to each cross validation fold """ if isinstance(cv, int): cv = StratifiedKFold(cv) if args_to_scorer is None: args_to_scorer={} scorer = check_scoring(estimator, scoring=scoring) cv_results:List[float] = [] log_loss_results = [] time_results = [] for train, test in cv.split(X, y): X_train, y_train = split_with_schemas(estimator, X, y, train) X_test, y_test = split_with_schemas(estimator, X, y, test, train) start = time.time() #Not calling sklearn.base.clone() here, because: # (1) For Lale pipelines, clone() calls the pipeline constructor # with edges=None, so the resulting topology is incorrect. # (2) For Lale individual operators, the fit() method already # clones the impl object, so cloning again is redundant. trained = estimator.fit(X_train, y_train) score_value = scorer(trained, X_test, y_test, **args_to_scorer) execution_time = time.time() - start # not all estimators have predict probability try: y_pred_proba = trained.predict_proba(X_test) logloss = log_loss(y_true=y_test, y_pred=y_pred_proba) log_loss_results.append(logloss) except BaseException: logger.debug("Warning, log loss cannot be computed") cv_results.append(score_value) time_results.append(execution_time) result = np.array(cv_results).mean(), np.array(log_loss_results).mean(), np.array(execution_time).mean() return result
Example #8
Source File: smac.py From lale with Apache License 2.0 | 4 votes |
def fit(self, X_train, y_train): self.cv = check_cv(self.cv, y = y_train, classifier=True) #TODO: Replace the classifier flag value by using tags? def smac_train_test(trainable, X_train, y_train): try: cv_score, logloss, execution_time = cross_val_score_track_trials(trainable, X_train, y_train, cv=self.cv, scoring=self.scoring) logger.debug("Successful trial of SMAC") except BaseException as e: #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion if self.handle_cv_failure: X_train_part, X_validation, y_train_part, y_validation = train_test_split(X_train, y_train, test_size=0.20) start = time.time() trained = trainable.fit(X_train_part, y_train_part) scorer = check_scoring(trainable, scoring=self.scoring) cv_score = scorer(trained, X_validation, y_validation) execution_time = time.time() - start y_pred_proba = trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") else: logger.debug("Error {} with pipeline:{}".format(e, trainable.to_json())) raise e return cv_score, logloss, execution_time def f(trainable): return_dict = {} try: score, logloss, execution_time = smac_train_test(trainable, X_train=X_train, y_train=y_train) return_dict = { 'loss': self.best_score - score, 'time': execution_time, 'log_loss': logloss } except BaseException as e: logger.warning(f"Exception caught in SMACCV:{type(e)}, {traceback.format_exc()}, SMAC will set a cost_for_crash to MAXINT.") raise e return return_dict['loss'] try : smac = orig_SMAC(scenario=self.scenario, rng=np.random.RandomState(42), tae_runner=lale_op_smac_tae(self.estimator, f)) incumbent = smac.optimize() self.trials = smac.get_runhistory() trainable = lale_trainable_op_from_config(self.estimator, incumbent) #get the trainable corresponding to the best params and train it on the entire training dataset. trained = trainable.fit(X_train, y_train) self._best_estimator = trained except BudgetExhaustedException: logger.warning('Maximum alloted optimization time exceeded. Optimization exited prematurely') except BaseException as e: logger.warning('Error during optimization: {}'.format(e)) self._best_estimator = None return self