Python xgboost.XGBClassifier() Examples

The following are 30 code examples of xgboost.XGBClassifier(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module xgboost , or try the search function .
Example #1
Source File: XgbClf.py    From rafiki with Apache License 2.0 8 votes vote down vote up
def _build_classifier(self, n_estimators, min_child_weight, max_depth, gamma, subsample, colsample_bytree, num_class):
        assert num_class >= 2
        
        if num_class == 2:
            clf = xgb.XGBClassifier(
            n_estimators=n_estimators,
            min_child_weight=min_child_weight,
            max_depth=max_depth,
            gamma=gamma,
            subsample=subsample,
            colsample_bytree=colsample_bytree
        ) 
        else:
            clf = xgb.XGBClassifier(
            n_estimators=n_estimators,
            min_child_weight=min_child_weight,
            max_depth=max_depth,
            gamma=gamma,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            objective='multi:softmax', 
            num_class=num_class
        ) 
        return clf 
Example #2
Source File: XGBoost_Classify_adult.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 7 votes vote down vote up
def Train(data, modelcount, censhu, yanzhgdata):
    model = xgb.XGBClassifier(max_depth=censhu, learning_rate=0.1, n_estimators=modelcount,
                              silent=True, objective='binary:logistic', booster='gbtree')

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = fmse(data[:, -1], train_out)[0]

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算f1度量
    add_mse = fmse(yanzhgdata[:, -1], add_yan)[0]
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
Example #3
Source File: prediction_model_factory.py    From redshells with MIT License 6 votes vote down vote up
def __init__(self):
        self._models = dict()
        try:
            import sklearn.ensemble
            self._models['RandomForestClassifier'] = sklearn.ensemble.RandomForestClassifier
        except ImportError:
            pass

        try:
            import xgboost
            self._models['XGBClassifier'] = xgboost.XGBClassifier
        except ImportError:
            pass

        try:
            import lightgbm
            self._models['LGBMClassifier'] = lightgbm.LGBMClassifier
        except ImportError:
            pass

        try:
            import catboost
            self._models['CatBoostClassifier'] = catboost.CatBoostClassifier
        except ImportError:
            pass 
Example #4
Source File: testScoreWithAdapaXgboost.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_03_xgb_classifier(self):
        print("\ntest 03 (xgb classifier with preprocessing) [binary-class]\n")
        model = XGBClassifier()
        pipeline_obj = Pipeline([
            ('scaler',MinMaxScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(self.X,self.Y_bin)
        file_name = "test03xgboost.pmml"
        xgboost_to_pmml(pipeline_obj, self.features, 'Species', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
        model_pred = pipeline_obj.predict(self.X)
        model_prob = pipeline_obj.predict_proba(self.X)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example #5
Source File: testScoreWithAdapaXgboost.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_01_xgb_classifier(self):
        print("\ntest 01 (xgb classifier with preprocessing) [multi-class]\n")
        model = XGBClassifier()
        pipeline_obj = Pipeline([
            ('scaler',MaxAbsScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(self.X,self.Y)
        file_name = "test01xgboost.pmml"
        xgboost_to_pmml(pipeline_obj, self.features, 'Species', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
        model_pred = pipeline_obj.predict(self.X)
        model_prob = pipeline_obj.predict_proba(self.X)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example #6
Source File: listing_9_6_crossvalidate_xgb.py    From fight-churn with MIT License 6 votes vote down vote up
def crossvalidate_xgb(data_set_path,n_test_split):

    X,y = prepare_data(data_set_path,ext='',as_retention=False)

    tscv = TimeSeriesSplit(n_splits=n_test_split)

    score_models = {'lift': make_scorer(calc_lift, needs_proba=True), 'AUC': 'roc_auc'}

    xgb_model = xgb.XGBClassifier(objective='binary:logistic')
    test_params = { 'max_depth': [1,2,4,6],
                    'learning_rate': [0.1,0.2,0.3,0.4],
                    'n_estimators': [20,40,80,120],
                    'min_child_weight' : [3,6,9,12]}
    gsearch = GridSearchCV(estimator=xgb_model,n_jobs=-1, scoring=score_models, cv=tscv, verbose=1,
                           return_train_score=False,  param_grid=test_params,refit='AUC')
    gsearch.fit(X.values,y)

    result_df = pd.DataFrame(gsearch.cv_results_)
    result_df.sort_values('mean_test_AUC',ascending=False,inplace=True)
    save_path = data_set_path.replace('.csv', '_crossval_xgb.csv')
    result_df.to_csv(save_path, index=False)
    print('Saved test scores to ' + save_path)

    pickle_path = data_set_path.replace('.csv', '_xgb_model.pkl')
    with open(pickle_path, 'wb') as fid:
        pickle.dump(gsearch.best_estimator_, fid)
    print('Saved model pickle to ' + pickle_path)

    predictions = gsearch.best_estimator_.predict_proba(X.values)
    predict_df = pd.DataFrame(predictions, index=X.index, columns=['retain_prob','churn_prob'])
    forecast_save_path = data_set_path.replace('.csv', '_xgb_predictions.csv')
    print('Saving results to %s' % forecast_save_path)
    predict_df.to_csv(forecast_save_path, header=True)

    forecast_histogram(data_set_path,predict_df,ext='xgb') 
Example #7
Source File: test_sentinels.py    From hyperparameter_hunter with MIT License 6 votes vote down vote up
def test_sentinels_optimization(env_0):
    optimizer = GBRT(iterations=2)
    optimizer.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(objective="reg:linear", max_depth=Integer(2, 20), subsample=0.5),
        model_extra_params=dict(
            fit=dict(
                eval_set=get_all_sentinels(env_0),
                early_stopping_rounds=5,
                eval_metric=Categorical(["auc", "mae"]),
            )
        ),
    )
    optimizer.go()


##################################################
# General Sentinel Scenarios
################################################## 
Example #8
Source File: mis_classifier.py    From autoimpute with MIT License 6 votes vote down vote up
def classifier(self, c):
        """Validate the classifier property and set default parameters.

        Args:
            c (classifier): if None, implement the xgboost classifier

        Raises:
            ValueError: classifier does not implement `predict_proba`
        """
        if c is None:
            self._classifier = XGBClassifier()
        else:
            m = "predict_proba"
            if not hasattr(c, m):
                raise ValueError(f"Classifier must implement {m} method.")
            self._classifier = c 
Example #9
Source File: test_boosted_trees_classifier_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _train_convert_evaluate_assert(self, **xgboost_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        xgb_model = xgboost.XGBClassifier(**xgboost_params)
        xgb_model.fit(self.X, self.target)

        # Convert the model
        spec = xgb_converter.convert(
            xgb_model, self.feature_names, self.output_name, mode="classifier"
        )

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            probabilities = xgb_model.predict_proba(self.X)
            df["classProbability"] = [
                dict(zip(xgb_model.classes_, cur_vals)) for cur_vals in probabilities
            ]
            metrics = evaluate_classifier_with_probabilities(
                spec, df, probabilities="classProbability", verbose=False
            )
            self.assertEquals(metrics["num_key_mismatch"], 0)
            self.assertLess(metrics["max_probability_error"], 1e-3) 
Example #10
Source File: test_importance.py    From mljar-supervised with MIT License 6 votes vote down vote up
def test_compute_and_plot(self):
        rows = 20
        X = np.random.rand(rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 2, rows)

        model = XGBClassifier(n_estimators=1, max_depth=2)
        model.fit(X, y)

        with tempfile.TemporaryDirectory() as tmpdir:
            PermutationImportance.compute_and_plot(
                model,
                X_validation=X,
                y_validation=y,
                model_file_path=tmpdir,
                learner_name="learner_test",
                metric_name=None,
                ml_task="binary_classification",
            )
            self.assertTrue(
                os.path.exists(os.path.join(tmpdir, "learner_test_importance.csv"))
            ) 
Example #11
Source File: test_xgboost.py    From hyperparameter_hunter with MIT License 6 votes vote down vote up
def opt_xgb_0():
    optimizer = RandomForestOptPro(iterations=2, random_state=1337)
    optimizer.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="reg:linear",
            max_depth=Integer(2, 20),
            learning_rate=Real(0.0001, 0.5),
            subsample=0.5,
            booster=Categorical(["gbtree", "dart"]),
        ),
    )
    optimizer.go()
    yield optimizer


##################################################
# Test Scenarios
################################################## 
Example #12
Source File: utils.py    From m2cgen with MIT License 6 votes vote down vote up
def __call__(self, estimator):
        fitted_estimator = estimator.fit(self.X_train, self.y_train)

        if isinstance(estimator, (LinearClassifierMixin, SVC, NuSVC,
                                  LightBaseClassifier)):
            y_pred = estimator.decision_function(self.X_test)
        elif isinstance(estimator, DecisionTreeClassifier):
            y_pred = estimator.predict_proba(self.X_test.astype(np.float32))
        elif isinstance(
                estimator,
                (ForestClassifier, XGBClassifier, LGBMClassifier)):
            y_pred = estimator.predict_proba(self.X_test)
        else:
            y_pred = estimator.predict(self.X_test)

        return self.X_test, y_pred, fitted_estimator 
Example #13
Source File: XGBoost_Classify_adult.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 6 votes vote down vote up
def recspre(estrs, predata, datadict, zhe):

    mo, ze = estrs.split('-')
    model = xgb.XGBClassifier(max_depth=int(ze), learning_rate=0.1, n_estimators=int(mo),
                              silent=True, objective='binary:logistic', booster='gbtree')

    model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1])

    # 预测
    yucede = model.predict(predata[:, :-1])
    # 计算混淆矩阵

    print(ConfuseMatrix(predata[:, -1], yucede))

    return fmse(predata[:, -1], yucede)

# 主函数 
Example #14
Source File: Blending_Classify_adult.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 6 votes vote down vote up
def XGBoost_First(self, data, max_depth=8, n_estimators=220):
        model = xgb.XGBClassifier(max_depth=max_depth, learning_rate=0.1, n_estimators=n_estimators,
                                  silent=True, objective='binary:logistic', booster='gbtree')
        model.fit(data['train'][:, :-1], data['train'][:, -1])
        # 存储验证数据集结果和预测数据集结果
        # 训练数据集的预测结果
        xul = model.predict(data['train'][:, :-1])
        # 验证的预测结果
        yanre = model.predict(data['test'][:, :-1])
        # 预测的预测结果
        prer = model.predict(data['predict'][:, :-1])

        # 每计算一折后,要计算训练、验证、预测数据的误差
        xx = self.F1(xul, data['train'][:, -1])

        yy = self.F1(yanre, data['test'][:, -1])

        pp = self.F1(prer, data['predict'][:, -1])

        # 开始结合
        self.yanzhneg_pr.append(yanre)
        self.yanzhneg_real = data['test'][:, -1]
        self.predi.append(prer)
        self.preal = data['predict'][:, -1]

        # 存储误差
        self.error_dict['XGBoost'] = [xx, yy, pp]
        return print('1层中的XGBoost运行完毕')

    # CatBoost 
Example #15
Source File: recorder_example.py    From hyperparameter_hunter with MIT License 6 votes vote down vote up
def do_optimization():
    optimizer = BayesianOptPro(iterations=5, random_state=1337)
    optimizer.forge_experiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="reg:linear",
            max_depth=Integer(2, 20),
            learning_rate=Real(0.0001, 0.5),
            subsample=0.5,
            booster=Categorical(["gbtree", "dart"]),
        ),
    )
    optimizer.go()


# We'll start with a normal `Environment` for comparison, using only the `env_kwargs` define above 
Example #16
Source File: lambda_callback_example.py    From hyperparameter_hunter with MIT License 6 votes vote down vote up
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        cv_type=RepeatedStratifiedKFold,
        cv_params=dict(n_splits=5, n_repeats=2, random_state=32),
        runs=2,
        # Just instantiate `Environment` with your list of callbacks, and go about business as usual
        experiment_callbacks=[printer_callback(), confusion_matrix_oof()],
        # In addition to `printer_callback` made above, we're also adding the `confusion_matrix_oof` callback
        # This, and other callbacks, can be found in `hyperparameter_hunter.callbacks.recipes`
    )

    experiment = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params={},
        model_extra_params=dict(fit=dict(verbose=False)),
    ) 
Example #17
Source File: test_sentinels.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def test_sentinels_experiment(env_0):
    # noinspection PyUnusedLocal
    experiment = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(objective="reg:linear", max_depth=3, subsample=0.5),
        model_extra_params=dict(
            fit=dict(eval_set=get_all_sentinels(env_0), early_stopping_rounds=5, eval_metric="mae")
        ),
    ) 
Example #18
Source File: test_xgboost_converters.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def test_xgb_classifier_reglog(self):
        iris = load_iris()
        X = iris.data[:, :2]
        y = iris.target
        y[y == 2] = 0

        xgb = XGBClassifier(objective='reg:logistic')
        xgb.fit(X, y)
        conv_model = convert_sklearn(
            xgb, initial_types=[
                ('input', FloatTensorType(shape=[None, X.shape[1]]))])
        self.assertTrue(conv_model is not None)
        dump_binary_classification(xgb, suffix="RegLog", label_string=False) 
Example #19
Source File: tester.py    From Text-Classification-Benchmark with MIT License 5 votes vote down vote up
def init_estimators():
    return [
        {'classifier': 'NB', 'model': MultinomialNB()},
        {'classifier': 'LR', 'model': LogisticRegression(random_state=42)},
        {'classifier': 'L-SVM', 'model': LinearSVC(max_iter=1000, random_state=42)},
        {'classifier': 'RBF-SVM', 'model': SVC(max_iter=1000, random_state=42)},
        {'classifier': 'RF', 'model': RandomForestClassifier(n_estimators=100, random_state=42)},
        {'classifier': 'XGB', 'model': XGBClassifier(n_estimators=100, random_state=42)},
        {'classifier': 'LGBM', 'model': LGBMClassifier(n_estimators=100, random_state=42)},
    ] 
Example #20
Source File: base.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def XGBClassifier(self):
        import xgboost as xgb
        return xgb.XGBClassifier 
Example #21
Source File: test_xgboost_converters.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def test_xgb_classifier_multi_reglog(self):
        iris = load_iris()
        X = iris.data[:, :2]
        y = iris.target

        xgb = XGBClassifier(objective='reg:logistic')
        xgb.fit(X, y)
        conv_model = convert_sklearn(
            xgb, initial_types=[
                ('input', FloatTensorType(shape=[None, X.shape[1]]))])
        self.assertTrue(conv_model is not None)
        dump_multiple_classification(
            xgb, suffix="RegLog",
            allow_failure="StrictVersion(onnx.__version__) < "
            "StrictVersion('1.3.0')") 
Example #22
Source File: model.py    From polyaxon with Apache License 2.0 5 votes vote down vote up
def model(log_learning_rate, max_depth=3, num_rounds=10, min_child_weight=5):
    model = XGBClassifier(
        learning_rate=10 ** log_learning_rate,
        max_depth=max_depth,
        num_rounds=num_rounds,
        min_child_weight=min_child_weight,
        objective='binary:logistic',
    )
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    return accuracy_score(pred, y_test) 
Example #23
Source File: train.py    From smart-zoneminder with MIT License 5 votes vote down vote up
def find_best_xgb_estimator(X, y, cv, param_comb, random_seed):
    # Random search over specified parameter values for XGBoost.
    # Exhaustive search takes many more cycles w/o much benefit.
    # Returns optimized XGBoost estimator.
    # Ref: https://www.kaggle.com/tilii7/hyperparameter-grid-search-with-xgboost
    print('\n Finding best XGBoost estimator...')
    param_grid = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'max_depth': [3, 4, 5]
        }
    init_est = xgb(learning_rate=0.02, n_estimators=600, objective='multi:softprob',
        verbose=1, n_jobs=1, random_state=random_seed)
    random_search = RandomizedSearchCV(estimator=init_est,
        param_distributions=param_grid, n_iter=param_comb, n_jobs=4,
        cv=cv, verbose=1, random_state=random_seed)
    random_search.fit(X, y)
    #print('\n All results:')
    #print(random_search.cv_results_)
    print('\n Best estimator:')
    print(random_search.best_estimator_)
    print('\n Best score for {}-fold search with {} parameter combinations:'
        .format(FOLDS, PARA_COMB))
    print(random_search.best_score_)
    print('\n Best hyperparameters:')
    print(random_search.best_params_)
    return random_search.best_estimator_

# Load the known faces and embeddings. 
Example #24
Source File: churn_calc.py    From fight-churn with MIT License 5 votes vote down vote up
def model_instance(self,model_code):
        if model_code==self.LOGISTIC_REGRESSION:
            return LogisticRegression(penalty='l1', solver='liblinear',fit_intercept=True)
        elif model_code==self.RANDOM_FOREST:
            return RandomForestClassifier(class_weight='balanced')
        elif model_code==self.XGBOOST:
            return xgb.XGBClassifier(objective='binary:logistic')
        else:
            raise "No model for model code %s" % model_code 
Example #25
Source File: test_sentinels.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def get_all_sentinels(env):
    """Get list of all dataset sentinel values in format expected by `XGBClassifier.fit.eval_set`"""
    return [
        (env.train_input, env.train_target),
        (env.validation_input, env.validation_target),
        (env.holdout_input, env.holdout_target),
    ] 
Example #26
Source File: test_predictors.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def test_predictor_holdout_breast_cancer():
    G.priority_callbacks = (DummyExperimentPredictorHoldout,)

    #################### Set Up Environment ####################
    env = Environment(
        train_dataset=get_breast_cancer_data(),
        results_path=assets_dir,
        holdout_dataset=get_holdout,
        target_column="diagnosis",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    #################### Perform Experiment ####################
    experiment = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="reg:linear",
            max_depth=3,
            n_estimators=100,
            learning_rate=0.02,
            min_child_weight=6,
            gamma=0.07,
            colsample_bytree=0.31,
        ),
        model_extra_params=dict(
            fit=dict(
                eval_set=[
                    (env.train_input, env.train_target),
                    (env.validation_input, env.validation_target),
                ],
                early_stopping_rounds=5,
                eval_metric="mae",
            )
        ),
    )

    G.priority_callbacks = tuple() 
Example #27
Source File: test_xgboost.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def exp_xgb_0():
    return CVExperiment(
        XGBClassifier, dict(subsample=0.01), model_extra_params=dict(fit=dict(verbose=False))
    )


##################################################
# Optimization Protocol Fixtures
################################################## 
Example #28
Source File: holdout_test_datasets_example.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        results_path="HyperparameterHunterAssets",
        # Both `holdout_dataset`, and `train_dataset` can be any of the following: pandas.DataFrame, filepath, or None
        # If a filepath is provided, it will be passed to :meth:`pandas.read_csv`.
        # In addition to the above types, `holdout_dataset` can also be provided as a callable (see above :func:`get_holdout_set`)
        holdout_dataset=get_holdout_set,
        test_dataset=get_toy_classification_data(),
        # By default, `holdout_dataset` will be scored with the provided metrics, just like OOF predictions
        # However, you can provide the additional `metrics_params` kwarg to specify which metrics are calculated for each dataset
        # See the documentation in :class:`environment.Environment` and :class:`metrics.ScoringMixIn` for more information
        metrics=["roc_auc_score"],
        cv_type=StratifiedKFold,
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    experiment = CVExperiment(
        model_initializer=XGBClassifier, model_init_params=dict(subsample=0.5)
    )
    # At the end of the Experiment, notice a few differences from the results of an Experiment given only training data:
    # 1) A "PredictionsHoldout" directory is created to house holdout predictions for Experiments given holdout data,
    # 2) A "PredictionsTest" directory is created to house test predictions for Experiments given test data,
    # 3) The Experiment's "Description" file will describe the evaluation of the holdout data, just like the OOF data,
    # 4) Leaderboards are modified to accommodate new holdout metrics evaluations, and
    # 5) New directories are created in "KeyAttributeLookup" for holdout and test datasets
    # The new "KeyAttributeLookup" entries serve to ensure the same datasets are used, and improper comparisons aren't made 
Example #29
Source File: do_full_save_example.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        cv_type=RepeatedStratifiedKFold,
        cv_params=dict(n_splits=3, n_repeats=2, random_state=32),
        do_full_save=do_full_save,
    )

    experiment_0 = CVExperiment(
        model_initializer=XGBClassifier, model_init_params=dict(subsample=0.01)
    )
    # Pro Tip: By setting XGBoost's subsample ridiculously low, we can get bad scores on purpose

    # Upon completion of this Experiment, we see a warning that not all result files will be saved
    # This is because the final score of the Experiment was below our threshold of 0.75
    # Specifically, we skipped saving prediction files (OOF, holdout, test, or in-fold), and the heartbeat file

    # What still got saved is the Experiment's: key information, leaderboard position, and description file
    # These are saved to allow us to use the information for future hyperparameter optimization, and detect repeated Experiments
    # Additionally, the Experiment's script backup is saved, but that's because its one of the first things that happens
    # For even finer control over what gets saved, use `do_full_save` together with `file_blacklist`

    # Now, lets perform another Experiment that does a bit better than our intentionally miserable one
    experiment_1 = CVExperiment(
        model_initializer=XGBClassifier, model_init_params=dict(subsample=0.5)
    )
    # Our second Experiment was executed in the same Environment, so it was still subject to the `do_full_save` constraint
    # However, because it scored above 0.75 (hopefully), all of the result files were saved 
Example #30
Source File: simple_experiment_example.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        cv_type="StratifiedKFold",
        cv_params=dict(n_splits=5, shuffle=True, random_state=32),
    )

    experiment = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params=dict(
            objective="reg:linear", max_depth=3, n_estimators=100, subsample=0.5
        ),
    )