Python sklearn.model_selection.cross_val_score() Examples
The following are 30 code examples for showing how to use sklearn.model_selection.cross_val_score(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
You may check out the related API usage on the sidebar.
You may also want to check out all available functions/classes of the module
sklearn.model_selection
, or try the search function
.
Example 1
Project: text-classifier Author: shibing624 File: grid_search_cv.py License: Apache License 2.0 | 7 votes |
def search_cv(x_train, y_train, x_test, y_test, model=GradientBoostingClassifier(n_estimators=30)): # grid search找到最好的参数 parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 2, 4], 'gamma': [0.125, 0.25, 0.5, 1, 2, 4]} clf = GridSearchCV(model, param_grid=parameters) grid_search = clf.fit(x_train, y_train) # 对结果打分 print("Best score: %0.3f" % grid_search.best_score_) print(grid_search.best_estimator_) # best prarams print('best prarams:', clf.best_params_) print('-----grid search end------------') print('on all train set') scores = cross_val_score(grid_search.best_estimator_, x_train, y_train, cv=3, scoring='accuracy') print(scores.mean(), scores) print('on test set') scores = cross_val_score(grid_search.best_estimator_, x_test, y_test, cv=3, scoring='accuracy') print(scores.mean(), scores)
Example 2
Project: MaliciousMacroBot Author: egaus File: mmbot.py License: MIT License | 6 votes |
def mmb_evaluate_model(self): """ Returns scores from cross validation evaluation on the malicious / benign classifier """ predictive_features = self.features['predictive_features'] self.clf_X = self.modeldata[predictive_features].values self.clf_y = np.array(self.modeldata['label']) X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0) lb = LabelBinarizer() y_train = np.array([number[0] for number in lb.fit_transform(y_train)]) eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2) eval_cls.fit(X_train, y_train) recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall') precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision') accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy') f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro') return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall}
Example 3
Project: fylearn Author: sorend File: test_nfpc.py License: MIT License | 6 votes |
def test_build_meowa_factory(): iris = datasets.load_iris() X = iris.data y = iris.target from sklearn.preprocessing import MinMaxScaler X = MinMaxScaler().fit_transform(X) l = nfpc.FuzzyPatternClassifier(membership_factory=t_factory, aggregation_factory=nfpc.MEOWAFactory()) from sklearn.model_selection import cross_val_score scores = cross_val_score(l, X, y, cv=10) mean = np.mean(scores) assert 0.80 < mean
Example 4
Project: fylearn Author: sorend File: test_nfpc.py License: MIT License | 6 votes |
def test_build_ps_owa_factory(): iris = datasets.load_iris() X = iris.data y = iris.target from sklearn.preprocessing import MinMaxScaler X = MinMaxScaler().fit_transform(X) l = nfpc.FuzzyPatternClassifier( membership_factory=t_factory, aggregation_factory=nfpc.GAOWAFactory(optimizer=nfpc.ps_owa_optimizer()) ) from sklearn.model_selection import cross_val_score scores = cross_val_score(l, X, y, cv=10) mean = np.mean(scores) print("mean", mean) assert 0.92 < mean
Example 5
Project: fylearn Author: sorend File: test_fpcga.py License: MIT License | 6 votes |
def test_classifier_iris(): iris = load_iris() X = iris.data y = iris.target from sklearn.preprocessing import MinMaxScaler X = MinMaxScaler().fit_transform(X) l = fpcga.FuzzyPatternClassifierGA(iterations=100, random_state=1) from sklearn.model_selection import cross_val_score scores = cross_val_score(l, X, y, cv=10) assert len(scores) == 10 assert np.mean(scores) > 0.6 mean = np.mean(scores) print("mean", mean) assert 0.92 == pytest.approx(mean, 0.01)
Example 6
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_score_objects.py License: MIT License | 6 votes |
def test_check_scoring_gridsearchcv(): # test that check_scoring works on GridSearchCV and pipeline. # slightly redundant non-regression test. grid = GridSearchCV(LinearSVC(), param_grid={'C': [.1, 1]}) scorer = check_scoring(grid, "f1") assert isinstance(scorer, _PredictScorer) pipe = make_pipeline(LinearSVC()) scorer = check_scoring(pipe, "f1") assert isinstance(scorer, _PredictScorer) # check that cross_val_score definitely calls the scorer # and doesn't make any assumptions about the estimator apart from having a # fit. scores = cross_val_score(EstimatorWithFit(), [[1], [2], [3]], [1, 0, 1], scoring=DummyScorer()) assert_array_equal(scores, 1)
Example 7
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_validation.py License: MIT License | 6 votes |
def test_cross_val_score_predict_groups(): # Check if ValueError (when groups is None) propagates to cross_val_score # and cross_val_predict # And also check if groups is correctly passed to the cv object X, y = make_classification(n_samples=20, n_classes=2, random_state=0) clf = SVC(kernel="linear") group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(), GroupShuffleSplit()] for cv in group_cvs: assert_raise_message(ValueError, "The 'groups' parameter should not be None.", cross_val_score, estimator=clf, X=X, y=y, cv=cv) assert_raise_message(ValueError, "The 'groups' parameter should not be None.", cross_val_predict, estimator=clf, X=X, y=y, cv=cv)
Example 8
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_validation.py License: MIT License | 6 votes |
def test_cross_val_score_pandas(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: from pandas import Series, DataFrame types.append((Series, DataFrame)) except ImportError: pass for TargetType, InputFeatureType in types: # X dataframe, y series # 3 fold cross val is used so we need atleast 3 samples per class X_df, y_ser = InputFeatureType(X), TargetType(y2) check_df = lambda x: isinstance(x, InputFeatureType) check_series = lambda x: isinstance(x, TargetType) clf = CheckingClassifier(check_X=check_df, check_y=check_series) cross_val_score(clf, X_df, y_ser)
Example 9
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_validation.py License: MIT License | 6 votes |
def test_cross_val_score_precomputed(): # test for svm with precomputed kernel svm = SVC(kernel="precomputed") iris = load_iris() X, y = iris.data, iris.target linear_kernel = np.dot(X, X.T) score_precomputed = cross_val_score(svm, linear_kernel, y) svm = SVC(kernel="linear") score_linear = cross_val_score(svm, X, y) assert_array_almost_equal(score_precomputed, score_linear) # test with callable svm = SVC(gamma='scale', kernel=lambda x, y: np.dot(x, y.T)) score_callable = cross_val_score(svm, X, y) assert_array_almost_equal(score_precomputed, score_callable) # Error raised for non-square X svm = SVC(kernel="precomputed") assert_raises(ValueError, cross_val_score, svm, X, y) # test error is raised when the precomputed kernel is not array-like # or sparse assert_raises(ValueError, cross_val_score, svm, linear_kernel.tolist(), y)
Example 10
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_validation.py License: MIT License | 6 votes |
def test_cross_val_score_with_score_func_classification(): iris = load_iris() clf = SVC(kernel='linear') # Default score (should be the accuracy score) scores = cross_val_score(clf, iris.data, iris.target, cv=5) assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2) # Correct classification score (aka. zero / one score) - should be the # same as the default estimator score zo_scores = cross_val_score(clf, iris.data, iris.target, scoring="accuracy", cv=5) assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2) # F1 score (class are balanced so f1_score should be equal to zero/one # score f1_scores = cross_val_score(clf, iris.data, iris.target, scoring="f1_weighted", cv=5) assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2)
Example 11
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_validation.py License: MIT License | 6 votes |
def test_score_memmap(): # Ensure a scalar score of memmap type is accepted iris = load_iris() X, y = iris.data, iris.target clf = MockClassifier() tf = tempfile.NamedTemporaryFile(mode='wb', delete=False) tf.write(b'Hello world!!!!!') tf.close() scores = np.memmap(tf.name, dtype=np.float64) score = np.memmap(tf.name, shape=(), mode='r', dtype=np.float64) try: cross_val_score(clf, X, y, scoring=lambda est, X, y: score) # non-scalar should still fail assert_raises(ValueError, cross_val_score, clf, X, y, scoring=lambda est, X, y: scores) finally: # Best effort to release the mmap file handles before deleting the # backing file under Windows scores, score = None, None for _ in range(3): try: os.unlink(tf.name) break except WindowsError: sleep(1.)
Example 12
Project: GeneticAlgorithmForFeatureSelection Author: renatoosousa File: gaFeatureSelection.py License: MIT License | 6 votes |
def getFitness(individual, X, y): """ Feature subset fitness function """ if(individual.count(0) != len(individual)): # get index with value 0 cols = [index for index in range( len(individual)) if individual[index] == 0] # get features subset X_parsed = X.drop(X.columns[cols], axis=1) X_subset = pd.get_dummies(X_parsed) # apply classification algorithm clf = LogisticRegression() return (avg(cross_val_score(clf, X_subset, y, cv=5)),) else: return(0,)
Example 13
Project: CatLearn Author: SUNCAT-Center File: site_stability.py License: GNU General Public License v3.0 | 6 votes |
def get_chromosome_score(self, X_chromosome): """ Computes fitness using the subset of data in X_chromosome. :param X_chromosome: subset of full data set, containing only a selection of the features. :return: mean R2 or keras history last column entry. """ np.random.seed(self.random_state) # Use either cross validation if self.scoring == 'cv': scores = cross_val_score(self.clf, X_chromosome, np.array(self.y), cv=self.n_cv) return np.mean(scores) # Or keras history in the case of neural networks (based on keras/tensorflow) else: try: history = self.clf.fit(X_chromosome, np.array(self.y)) return history.history[self.scoring][-1] except: raise ValueError('Use either "cv" or keras history metrics.')
Example 14
Project: brainiak Author: brainiak File: classification.py License: Apache License 2.0 | 6 votes |
def example_of_cross_validation_using_model_selection(raw_data, labels, num_subjects, num_epochs_per_subj): # NOTE: this method does not work for sklearn.svm.SVC with precomputed kernel # when the kernel matrix is computed in portions; also, this method only works # for self-correlation, i.e. correlation between the same data matrix. # no shrinking, set C=1 svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') #logit_clf = LogisticRegression() clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) # doing leave-one-subject-out cross validation # no shuffling in cv skf = model_selection.StratifiedKFold(n_splits=num_subjects, shuffle=False) scores = model_selection.cross_val_score(clf, list(zip(raw_data, raw_data)), y=labels, cv=skf) print(scores) logger.info( 'the overall cross validation accuracy is %.2f' % np.mean(scores) )
Example 15
Project: brainiak Author: brainiak File: mvpa_voxelselector.py License: Apache License 2.0 | 6 votes |
def _sfn(data, mask, myrad, bcast_var): """Score classifier on searchlight data using cross-validation. The classifier is in `bcast_var[2]`. The labels are in `bast_var[0]`. The number of cross-validation folds is in `bast_var[1]. """ clf = bcast_var[2] masked_data = data[0][mask, :].T # print(l[0].shape, mask.shape, data.shape) skf = model_selection.StratifiedKFold(n_splits=bcast_var[1], shuffle=False) accuracy = np.mean(model_selection.cross_val_score(clf, masked_data, y=bcast_var[0], cv=skf, n_jobs=1)) return accuracy
Example 16
Project: pysaliency Author: matthias-k File: test_crossvalidation.py License: MIT License | 6 votes |
def test_image_crossvalidation(stimuli, fixation_trains): gsmm = GaussianSaliencyModel() cv = ScikitLearnImageCrossValidationGenerator(stimuli, fixation_trains) assert unpack_crossval(cv) == [ ([False, False, False, False, False, True, True, True, True], [True, True, True, True, True, False, False, False, False]), ([True, True, True, True, True, False, False, False, True], [False, False, False, False, False, True, True, True, False]), ([True, True, True, True, True, True, True, True, False], [False, False, False, False, False, False, False, False, True]) ] X = fixations_to_scikit_learn(fixation_trains, normalize=stimuli, add_shape=True) assert cross_val_score( RegularizedKernelDensityEstimator(bandwidth=0.1, regularization=0.1), X, cv=cv, verbose=0).sum()
Example 17
Project: pysaliency Author: matthias-k File: test_crossvalidation.py License: MIT License | 6 votes |
def test_image_subject_crossvalidation(stimuli, fixation_trains): gsmm = GaussianSaliencyModel() cv = ScikitLearnImageSubjectCrossValidationGenerator(stimuli, fixation_trains) assert unpack_crossval(cv) == [ ([False, False, False, True, True, False, False, False, False], [True, True, True, False, False, False, False, False, False]), ([True, True, True, False, False, False, False, False, False], [False, False, False, True, True, False, False, False, False]) ] X = fixations_to_scikit_learn(fixation_trains, normalize=stimuli, add_shape=True) assert cross_val_score( RegularizedKernelDensityEstimator(bandwidth=0.1, regularization=0.1), X, cv=cv, verbose=0).sum()
Example 18
Project: Python-Machine-Learning-Cookbook-Second-Edition Author: PacktPublishing File: utilities.py License: MIT License | 6 votes |
def print_accuracy_report(classifier, X, y, num_validations=5): accuracy = model_selection.cross_val_score(classifier, X, y, scoring='accuracy', cv=num_validations) print("Accuracy: " + str(round(100*accuracy.mean(), 2)) + "%") f1 = model_selection.cross_val_score(classifier, X, y, scoring='f1_weighted', cv=num_validations) print("F1: " + str(round(100*f1.mean(), 2)) + "%") precision = model_selection.cross_val_score(classifier, X, y, scoring='precision_weighted', cv=num_validations) print("Precision: " + str(round(100*precision.mean(), 2)) + "%") recall = model_selection.cross_val_score(classifier, X, y, scoring='recall_weighted', cv=num_validations) print("Recall: " + str(round(100*recall.mean(), 2)) + "%")
Example 19
Project: Python-Machine-Learning-Cookbook-Second-Edition Author: PacktPublishing File: utilities.py License: MIT License | 6 votes |
def print_accuracy_report(classifier, X, y, num_validations=5): accuracy = model_selection.cross_val_score(classifier, X, y, scoring='accuracy', cv=num_validations) print("Accuracy: " + str(round(100*accuracy.mean(), 2)) + "%") f1 =model_selection.cross_val_score(classifier, X, y, scoring='f1_weighted', cv=num_validations) print("F1: " + str(round(100*f1.mean(), 2)) + "%") precision = model_selection.cross_val_score(classifier, X, y, scoring='precision_weighted', cv=num_validations) print("Precision: " + str(round(100*precision.mean(), 2)) + "%") recall = model_selection.cross_val_score(classifier, X, y, scoring='recall_weighted', cv=num_validations) print("Recall: " + str(round(100*recall.mean(), 2)) + "%")
Example 20
Project: pyglmnet Author: glm-tools File: test_pyglmnet.py License: MIT License | 6 votes |
def test_cv(): """Simple CV check.""" # XXX: don't use scikit-learn for tests. X, y = make_regression() cv = KFold(n_splits=5) glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1) # check that it returns 5 scores scores = cross_val_score(glm_normal, X, y, cv=cv) assert(len(scores) == 5) param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)}, {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01), 10, base=np.exp(1))}] glmcv = GridSearchCV(glm_normal, param_grid, cv=cv) glmcv.fit(X, y)
Example 21
Project: EDeN Author: fabriziocosta File: estimator.py License: MIT License | 5 votes |
def cross_val_score(self, graphs, targets, scoring='roc_auc', cv=5): """cross_val_score.""" x = self.transform(graphs) scores = cross_val_score( self.model, x, targets, cv=cv, scoring=scoring) return scores
Example 22
Project: EDeN Author: fabriziocosta File: estimator.py License: MIT License | 5 votes |
def cross_val_predict(self, graphs, targets, cv=5): """cross_val_score.""" x = self.transform(graphs) scores = cross_val_predict( self.model, x, targets, cv=cv, method='decision_function') return scores
Example 23
Project: EDeN Author: fabriziocosta File: estimator.py License: MIT License | 5 votes |
def bias_variance_decomposition(self, graphs, targets, cv=5, n_bootstraps=10): """bias_variance_decomposition.""" x = self.transform(graphs) score_list = [] for i in range(n_bootstraps): scores = cross_val_score( self.model, x, targets, cv=cv) score_list.append(scores) score_list = np.array(score_list) mean_scores = np.mean(score_list, axis=1) std_scores = np.std(score_list, axis=1) return mean_scores, std_scores
Example 24
Project: EDeN Author: fabriziocosta File: estimator.py License: MIT License | 5 votes |
def _eval_params(graphs, targets, param_distr): # sample parameters params = _sample_params(param_distr) # create model with those parameters est = EdenEstimator(**params) # run a cross_val_score scores = est.cross_val_score(graphs, targets) # return average return np.mean(scores), params
Example 25
Project: EDeN Author: fabriziocosta File: estimator_utils.py License: MIT License | 5 votes |
def estimate_predictive_performance(x_y, estimator=None, n_splits=10, random_state=1): """estimate_predictive_performance.""" x, y = x_y cv = ShuffleSplit(n_splits=n_splits, test_size=0.3, random_state=random_state) scoring = make_scorer(average_precision_score) scores = cross_val_score(estimator, x, y, cv=cv, scoring=scoring) return scores
Example 26
Project: Python-ELM Author: masaponto File: elm.py License: MIT License | 5 votes |
def main(): from sklearn import preprocessing from sklearn.datasets import fetch_openml as fetch_mldata from sklearn.model_selection import ShuffleSplit, KFold, cross_val_score db_name = 'australian' hid_nums = [100, 200, 300] data_set = fetch_mldata(db_name) data_set.data = preprocessing.normalize(data_set.data) data_set.target = [1 if i == 1 else -1 for i in data_set.target.astype(int)] for hid_num in hid_nums: print(hid_num, end=' ') e = ELM(hid_num) ave = 0 for i in range(10): cv = KFold(n_splits=5, shuffle=True) scores = cross_val_score( e, data_set.data, data_set.target, cv=cv, scoring='accuracy', n_jobs=-1) ave += scores.mean() ave /= 10 print("Accuracy: %0.3f " % (ave))
Example 27
Project: Python-ELM Author: masaponto File: ecob_elm.py License: MIT License | 5 votes |
def main(): from sklearn import preprocessing from sklearn.datasets import fetch_openml as fetch_mldata from sklearn.model_selection import cross_val_score db_name = 'iris' hid_num = 1000 data_set = fetch_mldata(db_name, version=1) data_set.data = preprocessing.scale(data_set.data) data_set.target = preprocessing.LabelEncoder().fit_transform(data_set.target) print(db_name) print('ECOBELM', hid_num) e = ECOBELM(hid_num, c=2**5) ave = 0 for i in range(10): scores = cross_val_score( e, data_set.data, data_set.target, cv=5, scoring='accuracy') ave += scores.mean() ave /= 10 print("Accuracy: %0.2f " % (ave)) print('ELM', hid_num) e = ELM(hid_num) ave = 0 for i in range(10): scores = cross_val_score( e, data_set.data, data_set.target, cv=5, scoring='accuracy') ave += scores.mean() ave /= 10 print("Accuracy: %0.2f " % (ave))
Example 28
Project: Fundamentals-of-Machine-Learning-with-scikit-learn Author: PacktPublishing File: 8voting_classifier.py License: MIT License | 5 votes |
def compute_accuracies(lr, dt, svc, vc, X, Y): accuracies = [] accuracies.append(cross_val_score(lr, X, Y, scoring='accuracy', cv=10).mean()) accuracies.append(cross_val_score(dt, X, Y, scoring='accuracy', cv=10).mean()) accuracies.append(cross_val_score(svc, X, Y, scoring='accuracy', cv=10).mean()) accuracies.append(cross_val_score(vc, X, Y, scoring='accuracy', cv=10).mean()) print('Accuracies:') print(np.array(accuracies)) return accuracies
Example 29
Project: adam_qas Author: 5hirish File: test_classify_question.py License: GNU General Public License v3.0 | 5 votes |
def test_classify_question(self): training_data_path = os.path.join(CORPUS_DIR, QUESTION_CLASSIFICATION_TRAINING_DATA) df_question = pandas.read_csv(training_data_path, sep='|', header=0) df_question_train, df_question_test = train_test_split(df_question, test_size=0.2, random_state=42) predicted_class, clf, df_question_train_label, df_question_train = \ classify_question(df_question_train=df_question_train, df_question_test=df_question_test) scores = cross_val_score(clf, df_question_train, df_question_train_label) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) print("SD:", scores.std()) assert scores.mean() > self.classification_score
Example 30
Project: cloudml-samples Author: GoogleCloudPlatform File: task.py License: Apache License 2.0 | 5 votes |
def _train_and_evaluate(estimator, dataset, output_dir): """Runs model training and evaluation. Args: estimator: (pipeline.Pipeline), Pipeline instance, assemble pre-processing steps and model training dataset: (pandas.DataFrame), DataFrame containing training data output_dir: (string), directory that the trained model will be exported Returns: None """ x_train, y_train, x_val, y_val = utils.data_train_test_split(dataset) estimator.fit(x_train, y_train) # Note: for now, use `cross_val_score` defaults (i.e. 3-fold) scores = model_selection.cross_val_score(estimator, x_val, y_val, cv=3) logging.info(scores) # Write model and eval metrics to `output_dir` model_output_path = os.path.join( output_dir, 'model', metadata.MODEL_FILE_NAME) metric_output_path = os.path.join( output_dir, 'experiment', metadata.METRIC_FILE_NAME) utils.dump_object(estimator, model_output_path) utils.dump_object(scores, metric_output_path) # The default name of the metric is training/hptuning/metric. # We recommend that you assign a custom name # The only functional difference is that if you use a custom name, # you must set the hyperparameterMetricTag value in the # HyperparameterSpec object in your job request to match your chosen name. hpt = hypertune.HyperTune() hpt.report_hyperparameter_tuning_metric( hyperparameter_metric_tag='my_metric_tag', metric_value=np.mean(scores), global_step=1000)