Python sklearn.model_selection.cross_val_score() Examples

The following are 30 code examples for showing how to use sklearn.model_selection.cross_val_score(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function .

Example 1
Project: text-classifier   Author: shibing624   File: grid_search_cv.py    License: Apache License 2.0 7 votes vote down vote up
def search_cv(x_train, y_train, x_test, y_test, model=GradientBoostingClassifier(n_estimators=30)):
    # grid search找到最好的参数
    parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 2, 4], 'gamma': [0.125, 0.25, 0.5, 1, 2, 4]}
    clf = GridSearchCV(model, param_grid=parameters)
    grid_search = clf.fit(x_train, y_train)
    # 对结果打分
    print("Best score: %0.3f" % grid_search.best_score_)
    print(grid_search.best_estimator_)

    # best prarams
    print('best prarams:', clf.best_params_)

    print('-----grid search end------------')
    print('on all train set')
    scores = cross_val_score(grid_search.best_estimator_, x_train, y_train, cv=3, scoring='accuracy')
    print(scores.mean(), scores)
    print('on test set')
    scores = cross_val_score(grid_search.best_estimator_, x_test, y_test, cv=3, scoring='accuracy')
    print(scores.mean(), scores) 
Example 2
Project: MaliciousMacroBot   Author: egaus   File: mmbot.py    License: MIT License 6 votes vote down vote up
def mmb_evaluate_model(self):
        """
        Returns scores from cross validation evaluation on the malicious / benign classifier
        """
        predictive_features = self.features['predictive_features']
        self.clf_X = self.modeldata[predictive_features].values
        self.clf_y = np.array(self.modeldata['label'])

        X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0)
        lb = LabelBinarizer()
        y_train = np.array([number[0] for number in lb.fit_transform(y_train)])
        eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2)
        eval_cls.fit(X_train, y_train)

        recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall')
        precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision')
        accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy')
        f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro')

        return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall} 
Example 3
Project: fylearn   Author: sorend   File: test_nfpc.py    License: MIT License 6 votes vote down vote up
def test_build_meowa_factory():

    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    from sklearn.preprocessing import MinMaxScaler
    X = MinMaxScaler().fit_transform(X)

    l = nfpc.FuzzyPatternClassifier(membership_factory=t_factory,
                                    aggregation_factory=nfpc.MEOWAFactory())

    from sklearn.model_selection import cross_val_score

    scores = cross_val_score(l, X, y, cv=10)
    mean = np.mean(scores)

    assert 0.80 < mean 
Example 4
Project: fylearn   Author: sorend   File: test_nfpc.py    License: MIT License 6 votes vote down vote up
def test_build_ps_owa_factory():

    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    from sklearn.preprocessing import MinMaxScaler
    X = MinMaxScaler().fit_transform(X)

    l = nfpc.FuzzyPatternClassifier(
        membership_factory=t_factory,
        aggregation_factory=nfpc.GAOWAFactory(optimizer=nfpc.ps_owa_optimizer())
    )

    from sklearn.model_selection import cross_val_score

    scores = cross_val_score(l, X, y, cv=10)
    mean = np.mean(scores)

    print("mean", mean)

    assert 0.92 < mean 
Example 5
Project: fylearn   Author: sorend   File: test_fpcga.py    License: MIT License 6 votes vote down vote up
def test_classifier_iris():

    iris = load_iris()

    X = iris.data
    y = iris.target

    from sklearn.preprocessing import MinMaxScaler
    X = MinMaxScaler().fit_transform(X)

    l = fpcga.FuzzyPatternClassifierGA(iterations=100, random_state=1)

    from sklearn.model_selection import cross_val_score

    scores = cross_val_score(l, X, y, cv=10)

    assert len(scores) == 10
    assert np.mean(scores) > 0.6
    mean = np.mean(scores)

    print("mean", mean)

    assert 0.92 == pytest.approx(mean, 0.01) 
Example 6
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_score_objects.py    License: MIT License 6 votes vote down vote up
def test_check_scoring_gridsearchcv():
    # test that check_scoring works on GridSearchCV and pipeline.
    # slightly redundant non-regression test.

    grid = GridSearchCV(LinearSVC(), param_grid={'C': [.1, 1]})
    scorer = check_scoring(grid, "f1")
    assert isinstance(scorer, _PredictScorer)

    pipe = make_pipeline(LinearSVC())
    scorer = check_scoring(pipe, "f1")
    assert isinstance(scorer, _PredictScorer)

    # check that cross_val_score definitely calls the scorer
    # and doesn't make any assumptions about the estimator apart from having a
    # fit.
    scores = cross_val_score(EstimatorWithFit(), [[1], [2], [3]], [1, 0, 1],
                             scoring=DummyScorer())
    assert_array_equal(scores, 1) 
Example 7
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_cross_val_score_predict_groups():
    # Check if ValueError (when groups is None) propagates to cross_val_score
    # and cross_val_predict
    # And also check if groups is correctly passed to the cv object
    X, y = make_classification(n_samples=20, n_classes=2, random_state=0)

    clf = SVC(kernel="linear")

    group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
                 GroupShuffleSplit()]
    for cv in group_cvs:
        assert_raise_message(ValueError,
                             "The 'groups' parameter should not be None.",
                             cross_val_score, estimator=clf, X=X, y=y, cv=cv)
        assert_raise_message(ValueError,
                             "The 'groups' parameter should not be None.",
                             cross_val_predict, estimator=clf, X=X, y=y, cv=cv) 
Example 8
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_cross_val_score_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        # 3 fold cross val is used so we need atleast 3 samples per class
        X_df, y_ser = InputFeatureType(X), TargetType(y2)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cross_val_score(clf, X_df, y_ser) 
Example 9
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_cross_val_score_precomputed():
    # test for svm with precomputed kernel
    svm = SVC(kernel="precomputed")
    iris = load_iris()
    X, y = iris.data, iris.target
    linear_kernel = np.dot(X, X.T)
    score_precomputed = cross_val_score(svm, linear_kernel, y)
    svm = SVC(kernel="linear")
    score_linear = cross_val_score(svm, X, y)
    assert_array_almost_equal(score_precomputed, score_linear)

    # test with callable
    svm = SVC(gamma='scale', kernel=lambda x, y: np.dot(x, y.T))
    score_callable = cross_val_score(svm, X, y)
    assert_array_almost_equal(score_precomputed, score_callable)

    # Error raised for non-square X
    svm = SVC(kernel="precomputed")
    assert_raises(ValueError, cross_val_score, svm, X, y)

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    assert_raises(ValueError, cross_val_score, svm,
                  linear_kernel.tolist(), y) 
Example 10
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_cross_val_score_with_score_func_classification():
    iris = load_iris()
    clf = SVC(kernel='linear')

    # Default score (should be the accuracy score)
    scores = cross_val_score(clf, iris.data, iris.target, cv=5)
    assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # Correct classification score (aka. zero / one score) - should be the
    # same as the default estimator score
    zo_scores = cross_val_score(clf, iris.data, iris.target,
                                scoring="accuracy", cv=5)
    assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # F1 score (class are balanced so f1_score should be equal to zero/one
    # score
    f1_scores = cross_val_score(clf, iris.data, iris.target,
                                scoring="f1_weighted", cv=5)
    assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2) 
Example 11
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_score_memmap():
    # Ensure a scalar score of memmap type is accepted
    iris = load_iris()
    X, y = iris.data, iris.target
    clf = MockClassifier()
    tf = tempfile.NamedTemporaryFile(mode='wb', delete=False)
    tf.write(b'Hello world!!!!!')
    tf.close()
    scores = np.memmap(tf.name, dtype=np.float64)
    score = np.memmap(tf.name, shape=(), mode='r', dtype=np.float64)
    try:
        cross_val_score(clf, X, y, scoring=lambda est, X, y: score)
        # non-scalar should still fail
        assert_raises(ValueError, cross_val_score, clf, X, y,
                      scoring=lambda est, X, y: scores)
    finally:
        # Best effort to release the mmap file handles before deleting the
        # backing file under Windows
        scores, score = None, None
        for _ in range(3):
            try:
                os.unlink(tf.name)
                break
            except WindowsError:
                sleep(1.) 
Example 12
Project: GeneticAlgorithmForFeatureSelection   Author: renatoosousa   File: gaFeatureSelection.py    License: MIT License 6 votes vote down vote up
def getFitness(individual, X, y):
    """
    Feature subset fitness function
    """

    if(individual.count(0) != len(individual)):
        # get index with value 0
        cols = [index for index in range(
            len(individual)) if individual[index] == 0]

        # get features subset
        X_parsed = X.drop(X.columns[cols], axis=1)
        X_subset = pd.get_dummies(X_parsed)

        # apply classification algorithm
        clf = LogisticRegression()

        return (avg(cross_val_score(clf, X_subset, y, cv=5)),)
    else:
        return(0,) 
Example 13
Project: CatLearn   Author: SUNCAT-Center   File: site_stability.py    License: GNU General Public License v3.0 6 votes vote down vote up
def get_chromosome_score(self, X_chromosome):
        """
        Computes fitness using the subset of data in X_chromosome.
        :param X_chromosome: subset of full data set, containing only a selection of the features.
        :return: mean R2 or keras history last column entry.
        """
        np.random.seed(self.random_state)
        # Use either cross validation
        if self.scoring == 'cv':
            scores = cross_val_score(self.clf, X_chromosome, np.array(self.y), cv=self.n_cv)
            return np.mean(scores)
        # Or keras history in the case of neural networks (based on keras/tensorflow)
        else:
            try:
                history = self.clf.fit(X_chromosome, np.array(self.y))
                return history.history[self.scoring][-1]
            except:
                raise ValueError('Use either "cv" or keras history metrics.') 
Example 14
Project: brainiak   Author: brainiak   File: classification.py    License: Apache License 2.0 6 votes vote down vote up
def example_of_cross_validation_using_model_selection(raw_data, labels, num_subjects, num_epochs_per_subj):
    # NOTE: this method does not work for sklearn.svm.SVC with precomputed kernel
    # when the kernel matrix is computed in portions; also, this method only works
    # for self-correlation, i.e. correlation between the same data matrix.

    # no shrinking, set C=1
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    #logit_clf = LogisticRegression()
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    # doing leave-one-subject-out cross validation
    # no shuffling in cv
    skf = model_selection.StratifiedKFold(n_splits=num_subjects,
                                          shuffle=False)
    scores = model_selection.cross_val_score(clf, list(zip(raw_data, raw_data)),
                                             y=labels,
                                             cv=skf)
    print(scores)
    logger.info(
        'the overall cross validation accuracy is %.2f' %
        np.mean(scores)
    ) 
Example 15
Project: brainiak   Author: brainiak   File: mvpa_voxelselector.py    License: Apache License 2.0 6 votes vote down vote up
def _sfn(data, mask, myrad, bcast_var):
    """Score classifier on searchlight data using cross-validation.

    The classifier is in `bcast_var[2]`. The labels are in `bast_var[0]`. The
    number of cross-validation folds is in `bast_var[1].
    """
    clf = bcast_var[2]
    masked_data = data[0][mask, :].T
    # print(l[0].shape, mask.shape, data.shape)
    skf = model_selection.StratifiedKFold(n_splits=bcast_var[1],
                                          shuffle=False)
    accuracy = np.mean(model_selection.cross_val_score(clf, masked_data,
                                                       y=bcast_var[0],
                                                       cv=skf,
                                                       n_jobs=1))
    return accuracy 
Example 16
Project: pysaliency   Author: matthias-k   File: test_crossvalidation.py    License: MIT License 6 votes vote down vote up
def test_image_crossvalidation(stimuli, fixation_trains):
    gsmm = GaussianSaliencyModel()

    cv = ScikitLearnImageCrossValidationGenerator(stimuli, fixation_trains)

    assert unpack_crossval(cv) == [
        ([False, False, False, False, False, True, True, True, True],
         [True, True, True, True, True, False, False, False, False]),
        ([True, True, True, True, True, False, False, False, True],
         [False, False, False, False, False, True, True, True, False]),
        ([True, True, True, True, True, True, True, True, False],
         [False, False, False, False, False, False, False, False, True])
    ]

    X = fixations_to_scikit_learn(fixation_trains, normalize=stimuli, add_shape=True)

    assert cross_val_score(
        RegularizedKernelDensityEstimator(bandwidth=0.1, regularization=0.1),
        X,
        cv=cv,
        verbose=0).sum() 
Example 17
Project: pysaliency   Author: matthias-k   File: test_crossvalidation.py    License: MIT License 6 votes vote down vote up
def test_image_subject_crossvalidation(stimuli, fixation_trains):
    gsmm = GaussianSaliencyModel()

    cv = ScikitLearnImageSubjectCrossValidationGenerator(stimuli, fixation_trains)

    assert unpack_crossval(cv) == [
        ([False, False, False, True, True, False, False, False, False],
         [True, True, True, False, False, False, False, False, False]),
        ([True, True, True, False, False, False, False, False, False],
         [False, False, False, True, True, False, False, False, False])
    ]
    
    X = fixations_to_scikit_learn(fixation_trains, normalize=stimuli, add_shape=True)

    assert cross_val_score(
        RegularizedKernelDensityEstimator(bandwidth=0.1, regularization=0.1),
        X,
        cv=cv,
        verbose=0).sum() 
Example 18
Project: Python-Machine-Learning-Cookbook-Second-Edition   Author: PacktPublishing   File: utilities.py    License: MIT License 6 votes vote down vote up
def print_accuracy_report(classifier, X, y, num_validations=5):
    accuracy = model_selection.cross_val_score(classifier, 
            X, y, scoring='accuracy', cv=num_validations)
    print("Accuracy: " + str(round(100*accuracy.mean(), 2)) + "%")

    f1 = model_selection.cross_val_score(classifier, 
            X, y, scoring='f1_weighted', cv=num_validations)
    print("F1: " + str(round(100*f1.mean(), 2)) + "%")

    precision = model_selection.cross_val_score(classifier, 
            X, y, scoring='precision_weighted', cv=num_validations)
    print("Precision: " + str(round(100*precision.mean(), 2)) + "%")

    recall = model_selection.cross_val_score(classifier, 
            X, y, scoring='recall_weighted', cv=num_validations)
    print("Recall: " + str(round(100*recall.mean(), 2)) + "%") 
Example 19
Project: Python-Machine-Learning-Cookbook-Second-Edition   Author: PacktPublishing   File: utilities.py    License: MIT License 6 votes vote down vote up
def print_accuracy_report(classifier, X, y, num_validations=5):
    accuracy = model_selection.cross_val_score(classifier, 
            X, y, scoring='accuracy', cv=num_validations)
    print("Accuracy: " + str(round(100*accuracy.mean(), 2)) + "%")

    f1 =model_selection.cross_val_score(classifier, 
            X, y, scoring='f1_weighted', cv=num_validations)
    print("F1: " + str(round(100*f1.mean(), 2)) + "%")

    precision = model_selection.cross_val_score(classifier, 
            X, y, scoring='precision_weighted', cv=num_validations)
    print("Precision: " + str(round(100*precision.mean(), 2)) + "%")

    recall = model_selection.cross_val_score(classifier, 
            X, y, scoring='recall_weighted', cv=num_validations)
    print("Recall: " + str(round(100*recall.mean(), 2)) + "%") 
Example 20
Project: pyglmnet   Author: glm-tools   File: test_pyglmnet.py    License: MIT License 6 votes vote down vote up
def test_cv():
    """Simple CV check."""
    # XXX: don't use scikit-learn for tests.
    X, y = make_regression()
    cv = KFold(n_splits=5)

    glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1)
    # check that it returns 5 scores
    scores = cross_val_score(glm_normal, X, y, cv=cv)
    assert(len(scores) == 5)

    param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)},
                  {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01),
                                             10, base=np.exp(1))}]
    glmcv = GridSearchCV(glm_normal, param_grid, cv=cv)
    glmcv.fit(X, y) 
Example 21
Project: EDeN   Author: fabriziocosta   File: estimator.py    License: MIT License 5 votes vote down vote up
def cross_val_score(self, graphs, targets,
                        scoring='roc_auc', cv=5):
        """cross_val_score."""
        x = self.transform(graphs)
        scores = cross_val_score(
            self.model, x, targets, cv=cv, scoring=scoring)
        return scores 
Example 22
Project: EDeN   Author: fabriziocosta   File: estimator.py    License: MIT License 5 votes vote down vote up
def cross_val_predict(self, graphs, targets, cv=5):
        """cross_val_score."""
        x = self.transform(graphs)
        scores = cross_val_predict(
            self.model, x, targets, cv=cv, method='decision_function')
        return scores 
Example 23
Project: EDeN   Author: fabriziocosta   File: estimator.py    License: MIT License 5 votes vote down vote up
def bias_variance_decomposition(self, graphs, targets,
                                    cv=5, n_bootstraps=10):
        """bias_variance_decomposition."""
        x = self.transform(graphs)
        score_list = []
        for i in range(n_bootstraps):
            scores = cross_val_score(
                self.model, x, targets, cv=cv)
            score_list.append(scores)
        score_list = np.array(score_list)
        mean_scores = np.mean(score_list, axis=1)
        std_scores = np.std(score_list, axis=1)
        return mean_scores, std_scores 
Example 24
Project: EDeN   Author: fabriziocosta   File: estimator.py    License: MIT License 5 votes vote down vote up
def _eval_params(graphs, targets, param_distr):
    # sample parameters
    params = _sample_params(param_distr)
    # create model with those parameters
    est = EdenEstimator(**params)
    # run a cross_val_score
    scores = est.cross_val_score(graphs, targets)
    # return average
    return np.mean(scores), params 
Example 25
Project: EDeN   Author: fabriziocosta   File: estimator_utils.py    License: MIT License 5 votes vote down vote up
def estimate_predictive_performance(x_y,
                                    estimator=None,
                                    n_splits=10,
                                    random_state=1):
    """estimate_predictive_performance."""
    x, y = x_y
    cv = ShuffleSplit(n_splits=n_splits,
                      test_size=0.3,
                      random_state=random_state)
    scoring = make_scorer(average_precision_score)
    scores = cross_val_score(estimator, x, y, cv=cv, scoring=scoring)
    return scores 
Example 26
Project: Python-ELM   Author: masaponto   File: elm.py    License: MIT License 5 votes vote down vote up
def main():
    from sklearn import preprocessing
    from sklearn.datasets import fetch_openml as fetch_mldata
    from sklearn.model_selection import ShuffleSplit, KFold, cross_val_score

    db_name = 'australian'
    hid_nums = [100, 200, 300]

    data_set = fetch_mldata(db_name)
    data_set.data = preprocessing.normalize(data_set.data)
    data_set.target = [1 if i == 1 else -1
                       for i in  data_set.target.astype(int)]

    for hid_num in hid_nums:
        print(hid_num, end=' ')
        e = ELM(hid_num)

        ave = 0
        for i in range(10):
            cv = KFold(n_splits=5, shuffle=True)
            scores = cross_val_score(
                e, data_set.data, data_set.target,
                cv=cv, scoring='accuracy', n_jobs=-1)
            ave += scores.mean()

        ave /= 10

        print("Accuracy: %0.3f " % (ave)) 
Example 27
Project: Python-ELM   Author: masaponto   File: ecob_elm.py    License: MIT License 5 votes vote down vote up
def main():
    from sklearn import preprocessing
    from sklearn.datasets import fetch_openml as fetch_mldata
    from sklearn.model_selection import cross_val_score

    db_name = 'iris'
    hid_num = 1000
    data_set = fetch_mldata(db_name, version=1)
    data_set.data = preprocessing.scale(data_set.data)
    data_set.target = preprocessing.LabelEncoder().fit_transform(data_set.target)

    print(db_name)
    print('ECOBELM', hid_num)
    e = ECOBELM(hid_num, c=2**5)
    ave = 0
    for i in range(10):
        scores = cross_val_score(
            e, data_set.data, data_set.target, cv=5, scoring='accuracy')
        ave += scores.mean()
    ave /= 10
    print("Accuracy: %0.2f " % (ave))

    print('ELM', hid_num)
    e = ELM(hid_num)
    ave = 0
    for i in range(10):
        scores = cross_val_score(
            e, data_set.data, data_set.target, cv=5, scoring='accuracy')
        ave += scores.mean()
    ave /= 10
    print("Accuracy: %0.2f " % (ave)) 
Example 28
Project: Fundamentals-of-Machine-Learning-with-scikit-learn   Author: PacktPublishing   File: 8voting_classifier.py    License: MIT License 5 votes vote down vote up
def compute_accuracies(lr, dt, svc, vc, X, Y):
    accuracies = []

    accuracies.append(cross_val_score(lr, X, Y, scoring='accuracy', cv=10).mean())
    accuracies.append(cross_val_score(dt, X, Y, scoring='accuracy', cv=10).mean())
    accuracies.append(cross_val_score(svc, X, Y, scoring='accuracy', cv=10).mean())
    accuracies.append(cross_val_score(vc, X, Y, scoring='accuracy', cv=10).mean())

    print('Accuracies:')
    print(np.array(accuracies))

    return accuracies 
Example 29
Project: adam_qas   Author: 5hirish   File: test_classify_question.py    License: GNU General Public License v3.0 5 votes vote down vote up
def test_classify_question(self):
        training_data_path = os.path.join(CORPUS_DIR, QUESTION_CLASSIFICATION_TRAINING_DATA)
        df_question = pandas.read_csv(training_data_path, sep='|', header=0)
        df_question_train, df_question_test = train_test_split(df_question, test_size=0.2, random_state=42)

        predicted_class, clf, df_question_train_label, df_question_train = \
            classify_question(df_question_train=df_question_train, df_question_test=df_question_test)

        scores = cross_val_score(clf, df_question_train, df_question_train_label)

        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
        print("SD:", scores.std())

        assert scores.mean() > self.classification_score 
Example 30
Project: cloudml-samples   Author: GoogleCloudPlatform   File: task.py    License: Apache License 2.0 5 votes vote down vote up
def _train_and_evaluate(estimator, dataset, output_dir):
  """Runs model training and evaluation.

  Args:
    estimator: (pipeline.Pipeline), Pipeline instance, assemble pre-processing
      steps and model training
    dataset: (pandas.DataFrame), DataFrame containing training data
    output_dir: (string), directory that the trained model will be exported

  Returns:
    None
  """
  x_train, y_train, x_val, y_val = utils.data_train_test_split(dataset)
  estimator.fit(x_train, y_train)

  # Note: for now, use `cross_val_score` defaults (i.e. 3-fold)
  scores = model_selection.cross_val_score(estimator, x_val, y_val, cv=3)

  logging.info(scores)

  # Write model and eval metrics to `output_dir`
  model_output_path = os.path.join(
      output_dir, 'model', metadata.MODEL_FILE_NAME)

  metric_output_path = os.path.join(
      output_dir, 'experiment', metadata.METRIC_FILE_NAME)

  utils.dump_object(estimator, model_output_path)
  utils.dump_object(scores, metric_output_path)

  # The default name of the metric is training/hptuning/metric.
  # We recommend that you assign a custom name
  # The only functional difference is that if you use a custom name,
  # you must set the hyperparameterMetricTag value in the
  # HyperparameterSpec object in your job request to match your chosen name.
  hpt = hypertune.HyperTune()
  hpt.report_hyperparameter_tuning_metric(
      hyperparameter_metric_tag='my_metric_tag',
      metric_value=np.mean(scores),
      global_step=1000)