Python lightgbm.train() Examples
The following are 30
code examples of lightgbm.train().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lightgbm
, or try the search function
.
Example #1
Source File: avito2.py From MachineLearning with Apache License 2.0 | 9 votes |
def run_lgb(train_X, train_y, val_X, val_y, test_X): params = { "objective": "regression", "metric": "rmse", "num_leaves": 30, "learning_rate": 0.1, "bagging_fraction": 0.7, "feature_fraction": 0.7, "bagging_frequency": 5, "bagging_seed": 2018, "verbosity": -1 } lgtrain = lgb.Dataset(train_X, label=train_y) lgval = lgb.Dataset(val_X, label=val_y) evals_result = {} model = lgb.train(params, lgtrain, 10000, valid_sets=[lgval], early_stopping_rounds=100, verbose_eval=20, evals_result=evals_result) pred_test_y = model.predict(test_X, num_iteration=model.best_iteration) return pred_test_y, model, evals_result # Splitting the data for model training#
Example #2
Source File: automl.py From kddcup2019-automl with MIT License | 8 votes |
def hyperopt_lightgbm(X: pd.DataFrame, y: pd.Series, params: Dict, config: Config): X_train, X_val, y_train, y_val = data_split(X, y, test_size=0.5) train_data = lgb.Dataset(X_train, label=y_train) valid_data = lgb.Dataset(X_val, label=y_val) space = { "max_depth": hp.choice("max_depth", np.arange(2, 10, 1, dtype=int)), # smaller than 2^(max_depth) "num_leaves": hp.choice("num_leaves", np.arange(4, 200, 4, dtype=int)), "feature_fraction": hp.quniform("feature_fraction", 0.2, 0.8, 0.1), # "bagging_fraction": hp.quniform("bagging_fraction", 0.2, 0.8, 0.1), # "bagging_freq": hp.choice("bagging_freq", np.linspace(0, 10, 2, dtype=int)), # "scale_pos_weight":hp.uniform('scale_pos_weight',1.0, 10.0), # "colsample_by_tree":hp.uniform("colsample_bytree",0.5,1.0), "min_child_weight": hp.quniform('min_child_weight', 2, 50, 2), "reg_alpha": hp.uniform("reg_alpha", 2.0, 8.0), "reg_lambda": hp.uniform("reg_lambda", 2.0, 8.0), "learning_rate": hp.quniform("learning_rate", 0.05, 0.4, 0.01), # "learning_rate": hp.loguniform("learning_rate", np.log(0.04), np.log(0.5)), # "min_data_in_leaf": hp.choice('min_data_in_leaf', np.arange(200, 2000, 100, dtype=int)), #"is_unbalance": hp.choice("is_unbalance", [True]) } def objective(hyperparams): model = lgb.train({**params, **hyperparams}, train_data, 300, valid_data, early_stopping_rounds=45, verbose_eval=0) score = model.best_score["valid_0"][params["metric"]] # in classification, less is better return {'loss': -score, 'status': STATUS_OK} trials = Trials() best = hyperopt.fmin(fn=objective, space=space, trials=trials, algo=tpe.suggest, max_evals=150, verbose=1, rstate=np.random.RandomState(1)) hyperparams = space_eval(space, best) log(f"auc = {-trials.best_trial['result']['loss']:0.4f} {hyperparams}") return hyperparams
Example #3
Source File: automl.py From Kaggler with MIT License | 7 votes |
def optimize_hyperparam(self, X, y, test_size=.2, n_eval=100): X_trn, X_val, y_trn, y_val = train_test_split(X, y, test_size=test_size, shuffle=self.shuffle) train_data = lgb.Dataset(X_trn, label=y_trn) valid_data = lgb.Dataset(X_val, label=y_val) def objective(hyperparams): model = lgb.train({**self.params, **hyperparams}, train_data, self.n_est, valid_data, early_stopping_rounds=self.n_stop, verbose_eval=0) score = model.best_score["valid_0"][self.metric] * self.loss_sign return {'loss': score, 'status': STATUS_OK, 'model': model} trials = Trials() best = hyperopt.fmin(fn=objective, space=self.space, trials=trials, algo=tpe.suggest, max_evals=n_eval, verbose=1, rstate=self.random_state) hyperparams = space_eval(self.space, best) return hyperparams, trials
Example #4
Source File: level2.py From kaggle-kuzushiji-2019 with MIT License | 7 votes |
def train_lgb(train_features, train_y, valid_features, valid_y, *, lr, num_boost_round): train_data = lgb.Dataset(train_features, train_y) valid_data = lgb.Dataset(valid_features, valid_y, reference=train_data) params = { 'objective': 'binary', 'metric': 'binary_logloss', 'learning_rate': lr, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'feature_fraction': 0.9, 'min_data_in_leaf': 20, 'num_leaves': 41, 'scale_pos_weight': 1.2, 'lambda_l2': 1, } print(params) return lgb.train( params=params, train_set=train_data, num_boost_round=num_boost_round, early_stopping_rounds=20, valid_sets=[valid_data], verbose_eval=10, )
Example #5
Source File: level2.py From kaggle-kuzushiji-2019 with MIT License | 6 votes |
def train_xgb(train_features, train_y, valid_features, valid_y, *, eta, num_boost_round): train_data = xgb.DMatrix(train_features, label=train_y) valid_data = xgb.DMatrix(valid_features, label=valid_y) params = { 'eta': eta, 'objective': 'binary:logistic', 'gamma': 0.01, 'max_depth': 8, } print(params) eval_list = [(valid_data, 'eval')] return xgb.train( params, train_data, num_boost_round, eval_list, early_stopping_rounds=20, verbose_eval=10, )
Example #6
Source File: misc.py From open-solution-mapping-challenge with MIT License | 6 votes |
def fit(self, X, y, X_valid, y_valid, feature_names, categorical_features, **kwargs): train = lgb.Dataset(X, label=y, feature_name=feature_names, categorical_feature=categorical_features ) valid = lgb.Dataset(X_valid, label=y_valid, feature_name=feature_names, categorical_feature=categorical_features ) evaluation_results = {} self.estimator = lgb.train(self.model_config, train, valid_sets=[train, valid], valid_names=['train', 'valid'], evals_result=evaluation_results, num_boost_round=self.training_config.number_boosting_rounds, early_stopping_rounds=self.training_config.early_stopping_rounds, verbose_eval=self.model_config.verbose, feval=self.evaluation_function) return self
Example #7
Source File: lgb.py From kaggle-plasticc with MIT License | 6 votes |
def train_and_predict(train_df, test_df, features, params): oof_preds = np.zeros((len(train_df), params["num_class"])) test_preds = np.zeros((len(test_df), params["num_class"])) skf = StratifiedKFold(NUM_FOLDS, random_state=4) for train_index, val_index in skf.split(train_df, train_df["target"]): dev_df, val_df = train_df.iloc[train_index], train_df.iloc[val_index] lgb_train = lgb.Dataset(dev_df[features], dev_df["target"], weight=dev_df["sample_weight"]) lgb_val = lgb.Dataset(val_df[features], val_df["target"], weight=val_df["sample_weight"]) model = lgb.train(params, lgb_train, num_boost_round=200, valid_sets=[lgb_train, lgb_val], early_stopping_rounds=10, verbose_eval=50) oof_preds[val_index, :] = model.predict(val_df[features]) test_preds += model.predict(test_df[features]) / NUM_FOLDS return oof_preds, test_preds
Example #8
Source File: avito.py From MachineLearning with Apache License 2.0 | 6 votes |
def get_oof(clf, x_train, y, x_test): oof_train = np.zeros((ntrain,)) oof_test = np.zeros((ntest,)) oof_test_skf = np.empty((NFOLDS, ntest)) for i, (train_index, test_index) in enumerate(kf): print('\nFold {}'.format(i)) x_tr = x_train[train_index] y_tr = y[train_index] x_te = x_train[test_index] clf.train(x_tr, y_tr) oof_train[test_index] = clf.predict(x_te) oof_test_skf[i, :] = clf.predict(x_test) oof_test[:] = oof_test_skf.mean(axis=0) return oof_train.reshape(-1, 1), oof_test.reshape(-1, 1)
Example #9
Source File: models.py From steppy-toolkit with MIT License | 6 votes |
def fit(self, X, y, X_valid, y_valid): self._check_target_shape_and_type(y, 'y') self._check_target_shape_and_type(y_valid, 'y_valid') y = self._format_target(y) y_valid = self._format_target(y_valid) logger.info('LightGBM transformer, train data shape {}'.format(X.shape)) logger.info('LightGBM transformer, validation data shape {}'.format(X_valid.shape)) logger.info('LightGBM transformer, train labels shape {}'.format(y.shape)) logger.info('LightGBM transformer, validation labels shape {}'.format(y_valid.shape)) data_train = lgb.Dataset(data=X, label=y, **self.dataset_parameters) data_valid = lgb.Dataset(data=X_valid, label=y_valid, **self.dataset_parameters) self.estimator = lgb.train(params=self.booster_parameters, train_set=data_train, valid_sets=[data_train, data_valid], valid_names=['data_train', 'data_valid'], **self.training_parameters) return self
Example #10
Source File: test_LightGbmTreeEnsembleConverters.py From onnxmltools with MIT License | 6 votes |
def test_lightgbm_booster_multi_classifier(self): X = [[0, 1], [1, 1], [2, 0], [1, 2], [-1, 2], [1, -2]] X = numpy.array(X, dtype=numpy.float32) y = [0, 1, 0, 1, 2, 2] data = lightgbm.Dataset(X, label=y) model = lightgbm.train({'boosting_type': 'gbdt', 'objective': 'multiclass', 'n_estimators': 3, 'min_child_samples': 1, 'num_class': 3}, data) model_onnx, prefix = convert_model(model, 'tree-based classifier', [('input', FloatTensorType([None, 2]))]) dump_data_and_model(X, model, model_onnx, allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", basename=prefix + "BoosterBin" + model.__class__.__name__) try: from onnxruntime import InferenceSession except ImportError: # onnxruntime not installed (python 2.7) return sess = InferenceSession(model_onnx.SerializeToString()) out = sess.get_outputs() names = [o.name for o in out] assert names == ['label', 'probabilities']
Example #11
Source File: 04_PlanetKaggle_GPU.py From h2o4gpu with Apache License 2.0 | 6 votes |
def train_and_validate_lightgbm(params, train_features, train_labels, validation_features, num_boost_round): n_classes = train_labels.shape[1] y_val_pred = np.zeros((validation_features.shape[0], n_classes)) time_results = defaultdict(list) for class_i in tqdm(range(n_classes)): lgb_train = lgb.Dataset(train_features, train_labels[:, class_i], free_raw_data=False) with Timer() as t: model = lgb.train(params, lgb_train, num_boost_round = num_boost_round) time_results['train_time'].append(t.interval) with Timer() as t: y_val_pred[:, class_i] = model.predict(validation_features) time_results['test_time'].append(t.interval) return y_val_pred, time_results # In[8]:
Example #12
Source File: lgb_model.py From autogluon with Apache License 2.0 | 6 votes |
def generate_datasets(self, X_train: DataFrame, Y_train: Series, params, X_test=None, Y_test=None, dataset_train=None, dataset_val=None, save=False): lgb_dataset_params_keys = ['objective', 'two_round', 'num_threads', 'num_classes', 'verbose'] # Keys that are specific to lightGBM Dataset object construction. data_params = {key: params[key] for key in lgb_dataset_params_keys if key in params}.copy() W_train = None # TODO: Add weight support W_test = None # TODO: Add weight support if X_train is not None: X_train = self.preprocess(X_train, is_train=True) if X_test is not None: X_test = self.preprocess(X_test) # TODO: Try creating multiple Datasets for subsets of features, then combining with Dataset.add_features_from(), this might avoid memory spike if not dataset_train: # X_train, W_train = self.convert_to_weight(X=X_train) dataset_train = construct_dataset(x=X_train, y=Y_train, location=f'{self.path}datasets{os.path.sep}train', params=data_params, save=save, weight=W_train) # dataset_train = construct_dataset_lowest_memory(X=X_train, y=Y_train, location=self.path + 'datasets/train', params=data_params) if (not dataset_val) and (X_test is not None) and (Y_test is not None): # X_test, W_test = self.convert_to_weight(X=X_test) dataset_val = construct_dataset(x=X_test, y=Y_test, location=f'{self.path}datasets{os.path.sep}val', reference=dataset_train, params=data_params, save=save, weight=W_test) # dataset_val = construct_dataset_lowest_memory(X=X_test, y=Y_test, location=self.path + 'datasets/val', reference=dataset_train, params=data_params) return dataset_train, dataset_val
Example #13
Source File: optimize.py From optuna with MIT License | 6 votes |
def __call__(self, trial: optuna.trial.Trial) -> float: self._preprocess(trial) start_time = time.time() booster = lgb.train(self.lgbm_params, self.train_set, **self.lgbm_kwargs) val_score = self._get_booster_best_score(booster) elapsed_secs = time.time() - start_time average_iteration_time = elapsed_secs / booster.current_iteration() if self.model_dir is not None: path = os.path.join(self.model_dir, "{}.pkl".format(trial.number)) with open(path, "wb") as fout: pickle.dump(booster, fout) _logger.info("The booster of trial#{} was saved as {}.".format(trial.number, path)) if self.compare_validation_metrics(val_score, self.best_score): self.best_score = val_score self.best_booster_with_trial_number = (booster, trial.number) self._postprocess(trial, elapsed_secs, average_iteration_time) return val_score
Example #14
Source File: test_lightgbm.py From docker-python with Apache License 2.0 | 6 votes |
def test_cpu(self): lgb_train = lgb.Dataset('/input/tests/data/lgb_train.bin') lgb_eval = lgb.Dataset('/input/tests/data/lgb_test.bin', reference=lgb_train) params = { 'task': 'train', 'boosting_type': 'gbdt', 'objective': 'regression', 'metric': {'l2', 'auc'}, 'num_leaves': 31, 'learning_rate': 0.05, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'verbose': 0 } # Run only one round for faster test gbm = lgb.train(params, lgb_train, num_boost_round=1, valid_sets=lgb_eval, early_stopping_rounds=1) self.assertEqual(1, gbm.best_iteration)
Example #15
Source File: test_lightgbm.py From docker-python with Apache License 2.0 | 6 votes |
def test_gpu(self): lgb_train = lgb.Dataset('/input/tests/data/lgb_train.bin') lgb_eval = lgb.Dataset('/input/tests/data/lgb_test.bin', reference=lgb_train) params = { 'boosting_type': 'gbdt', 'objective': 'regression', 'metric': 'auc', 'num_leaves': 31, 'learning_rate': 0.05, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'verbose': 1, 'device': 'gpu' } # Run only one round for faster test gbm = lgb.train(params, lgb_train, num_boost_round=1, valid_sets=lgb_eval, early_stopping_rounds=1) self.assertEqual(1, gbm.best_iteration)
Example #16
Source File: test_lightgbm.py From optuna with MIT License | 6 votes |
def objective( trial, metric="binary_error", valid_name="valid_0", force_default_valid_names=False, cv=False ): # type: (optuna.trial.Trial, str, str, bool, bool) -> float dtrain = lgb.Dataset([[1.0], [2.0], [3.0]], label=[1.0, 0.0, 1.0]) dtest = lgb.Dataset([[1.0]], label=[1.0]) if force_default_valid_names: valid_names = None else: valid_names = [valid_name] pruning_callback = LightGBMPruningCallback(trial, metric, valid_name=valid_name) if cv: lgb.cv( {"objective": "binary", "metric": ["auc", "binary_error"]}, dtrain, 1, verbose_eval=False, nfold=2, callbacks=[pruning_callback], ) else: lgb.train( {"objective": "binary", "metric": ["auc", "binary_error"]}, dtrain, 1, valid_sets=[dtest], valid_names=valid_names, verbose_eval=False, callbacks=[pruning_callback], ) return 1.0
Example #17
Source File: lightgbm.py From talkingdata-adtracking-fraud-detection with MIT License | 6 votes |
def train_and_predict(self, train, valid, weight, categorical_features: List[str], target: str, params: dict) \ -> Tuple[Booster, dict]: if type(train) != pd.DataFrame or type(valid) != pd.DataFrame: raise ValueError('Parameter train and valid must be pandas.DataFrame') if list(train.columns) != list(valid.columns): raise ValueError('Train and valid must have a same column list') predictors = train.columns.drop(target) if weight is None: d_train = lgb.Dataset(train[predictors], label=train[target].values) else: print(weight) d_train = lgb.Dataset(train[predictors], label=train[target].values, weight=weight) d_valid = lgb.Dataset(valid[predictors], label=valid[target].values) eval_results = {} model: Booster = lgb.train(params['model_params'], d_train, categorical_feature=categorical_features, valid_sets=[d_train, d_valid], valid_names=['train', 'valid'], evals_result=eval_results, **params['train_params']) return model, eval_results
Example #18
Source File: lightgbm_simple.py From optuna with MIT License | 6 votes |
def objective(trial): data, target = sklearn.datasets.load_breast_cancer(return_X_y=True) train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25) dtrain = lgb.Dataset(train_x, label=train_y) param = { "objective": "binary", "metric": "binary_logloss", "verbosity": -1, "boosting_type": "gbdt", "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0), "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0), "num_leaves": trial.suggest_int("num_leaves", 2, 256), "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), "bagging_freq": trial.suggest_int("bagging_freq", 1, 7), "min_child_samples": trial.suggest_int("min_child_samples", 5, 100), } gbm = lgb.train(param, dtrain) preds = gbm.predict(valid_x) pred_labels = np.rint(preds) accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels) return accuracy
Example #19
Source File: test_lightgbm_autolog.py From mlflow with Apache License 2.0 | 6 votes |
def test_lgb_autolog_logs_metrics_with_multi_validation_data_and_metrics(bst_params, train_set): mlflow.lightgbm.autolog() evals_result = {} params = {'metric': ['multi_error', 'multi_logloss']} params.update(bst_params) valid_sets = [train_set, lgb.Dataset(train_set.data)] valid_names = ['train', 'valid'] lgb.train(params, train_set, num_boost_round=10, valid_sets=valid_sets, valid_names=valid_names, evals_result=evals_result) run = get_latest_run() data = run.data client = mlflow.tracking.MlflowClient() for valid_name in valid_names: for metric_name in params['metric']: metric_key = '{}-{}'.format(valid_name, metric_name) metric_history = [x.value for x in client.get_metric_history(run.info.run_id, metric_key)] assert metric_key in data.metrics assert len(metric_history) == 10 assert metric_history == evals_result[valid_name][metric_name]
Example #20
Source File: test_lightgbm_autolog.py From mlflow with Apache License 2.0 | 6 votes |
def test_lgb_autolog_logs_metrics_with_multi_metrics(bst_params, train_set): mlflow.lightgbm.autolog() evals_result = {} params = {'metric': ['multi_error', 'multi_logloss']} params.update(bst_params) valid_sets = [train_set] valid_names = ['train'] lgb.train(params, train_set, num_boost_round=10, valid_sets=valid_sets, valid_names=valid_names, evals_result=evals_result) run = get_latest_run() data = run.data client = mlflow.tracking.MlflowClient() for metric_name in params['metric']: metric_key = '{}-{}'.format(valid_names[0], metric_name) metric_history = [x.value for x in client.get_metric_history(run.info.run_id, metric_key)] assert metric_key in data.metrics assert len(metric_history) == 10 assert metric_history == evals_result['train'][metric_name]
Example #21
Source File: test_lightgbm_autolog.py From mlflow with Apache License 2.0 | 6 votes |
def test_lgb_autolog_logs_metrics_with_multi_validation_data(bst_params, train_set): mlflow.lightgbm.autolog() evals_result = {} # If we use [train_set, train_set] here, LightGBM ignores the first dataset. # To avoid that, create a new Dataset object. valid_sets = [train_set, lgb.Dataset(train_set.data)] valid_names = ['train', 'valid'] lgb.train(bst_params, train_set, num_boost_round=10, valid_sets=valid_sets, valid_names=valid_names, evals_result=evals_result) run = get_latest_run() data = run.data client = mlflow.tracking.MlflowClient() for valid_name in valid_names: metric_key = '{}-multi_logloss'.format(valid_name) metric_history = [x.value for x in client.get_metric_history(run.info.run_id, metric_key)] assert metric_key in data.metrics assert len(metric_history) == 10 assert metric_history == evals_result[valid_name]['multi_logloss']
Example #22
Source File: test_lightgbm_autolog.py From mlflow with Apache License 2.0 | 6 votes |
def test_lgb_autolog_logs_default_params(bst_params, train_set): mlflow.lightgbm.autolog() lgb.train(bst_params, train_set) run = get_latest_run() params = run.data.params expected_params = { 'num_boost_round': 100, 'feature_name': 'auto', 'categorical_feature': 'auto', 'verbose_eval': True, 'keep_training_booster': False } expected_params.update(bst_params) for key, val in expected_params.items(): assert key in params assert params[key] == str(val) unlogged_params = ['params', 'train_set', 'valid_sets', 'valid_names', 'fobj', 'feval', 'init_model', 'evals_result', 'learning_rates', 'callbacks'] for param in unlogged_params: assert param not in params
Example #23
Source File: models.py From open-solution-data-science-bowl-2018 with MIT License | 6 votes |
def fit(self, X, y, X_valid, y_valid, feature_names, categorical_features, **kwargs): train = lgb.Dataset(X, label=y, feature_name=feature_names, categorical_feature=categorical_features ) valid = lgb.Dataset(X_valid, label=y_valid, feature_name=feature_names, categorical_feature=categorical_features ) evaluation_results = {} self.estimator = lgb.train(self.model_params, train, valid_sets=[train, valid], valid_names=['train', 'valid'], evals_result=evaluation_results, num_boost_round=self.training_params.number_boosting_rounds, early_stopping_rounds=self.training_params.early_stopping_rounds, verbose_eval=10, feval=self.evaluation_function) return self
Example #24
Source File: misc.py From open-solution-data-science-bowl-2018 with MIT License | 6 votes |
def fit(self, X, y, X_valid, y_valid, feature_names, categorical_features, **kwargs): train = lgb.Dataset(X, label=y, feature_name=feature_names, categorical_feature=categorical_features ) valid = lgb.Dataset(X_valid, label=y_valid, feature_name=feature_names, categorical_feature=categorical_features ) evaluation_results = {} self.estimator = lgb.train(self.model_config, train, valid_sets=[train, valid], valid_names=['train', 'valid'], evals_result=evaluation_results, num_boost_round=self.training_config.number_boosting_rounds, early_stopping_rounds=self.training_config.early_stopping_rounds, verbose_eval=self.model_config.verbose, feval=self.evaluation_function) return self
Example #25
Source File: main.py From nni with MIT License | 6 votes |
def load_data(train_path='./data/regression.train', test_path='./data/regression.test'): ''' Load or create dataset ''' print('Load data...') df_train = pd.read_csv(train_path, header=None, sep='\t') df_test = pd.read_csv(test_path, header=None, sep='\t') num = len(df_train) split_num = int(0.9 * num) y_train = df_train[0].values y_test = df_test[0].values y_eval = y_train[split_num:] y_train = y_train[:split_num] X_train = df_train.drop(0, axis=1).values X_test = df_test.drop(0, axis=1).values X_eval = X_train[split_num:, :] X_train = X_train[:split_num, :] # create dataset for lightgbm lgb_train = lgb.Dataset(X_train, y_train) lgb_eval = lgb.Dataset(X_eval, y_eval, reference=lgb_train) return lgb_train, lgb_eval, X_test, y_test
Example #26
Source File: main.py From nni with MIT License | 6 votes |
def run(lgb_train, lgb_eval, params, X_test, y_test): print('Start training...') params['num_leaves'] = int(params['num_leaves']) # train gbm = lgb.train(params, lgb_train, num_boost_round=20, valid_sets=lgb_eval, early_stopping_rounds=5) print('Start predicting...') # predict y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration) # eval rmse = mean_squared_error(y_test, y_pred) ** 0.5 print('The rmse of prediction is:', rmse) nni.report_final_result(rmse)
Example #27
Source File: lightgbm_example.py From ray with Apache License 2.0 | 6 votes |
def train_breast_cancer(config): data, target = sklearn.datasets.load_breast_cancer(return_X_y=True) train_x, test_x, train_y, test_y = train_test_split( data, target, test_size=0.25) train_set = lgb.Dataset(train_x, label=train_y) test_set = lgb.Dataset(test_x, label=test_y) gbm = lgb.train( config, train_set, valid_sets=[test_set], verbose_eval=False, callbacks=[LightGBMCallback]) preds = gbm.predict(test_x) pred_labels = np.rint(preds) tune.report( mean_accuracy=sklearn.metrics.accuracy_score(test_y, pred_labels), done=True)
Example #28
Source File: models.py From open-solution-home-credit with MIT License | 6 votes |
def fit(self, X, y, X_valid, y_valid, feature_names=None, feature_types=None, **kwargs): train = xgb.DMatrix(X, label=y, feature_names=feature_names, feature_types=feature_types) valid = xgb.DMatrix(X_valid, label=y_valid, feature_names=feature_names, feature_types=feature_types) evaluation_results = {} self.estimator = xgb.train(params=self.model_config, dtrain=train, evals=[(train, 'train'), (valid, 'valid')], evals_result=evaluation_results, num_boost_round=self.training_config.nrounds, early_stopping_rounds=self.training_config.early_stopping_rounds, verbose_eval=self.model_config.verbose, feval=self.evaluation_function) return self
Example #29
Source File: models.py From open-solution-home-credit with MIT License | 6 votes |
def fit(self, X, y, X_valid, y_valid, feature_names=None, categorical_features=None, **kwargs): logger.info('Catboost, train data shape {}'.format(X.shape)) logger.info('Catboost, validation data shape {}'.format(X_valid.shape)) logger.info('Catboost, train labels shape {}'.format(y.shape)) logger.info('Catboost, validation labels shape {}'.format(y_valid.shape)) categorical_indeces = self._get_categorical_indeces(feature_names, categorical_features) self.estimator.fit(X, y, eval_set=(X_valid, y_valid), cat_features=categorical_indeces) return self
Example #30
Source File: models.py From open-solution-mapping-challenge with MIT License | 6 votes |
def fit(self, X, y, X_valid, y_valid, feature_names, categorical_features, **kwargs): train = lgb.Dataset(X, label=y, feature_name=feature_names, categorical_feature=categorical_features ) valid = lgb.Dataset(X_valid, label=y_valid, feature_name=feature_names, categorical_feature=categorical_features ) evaluation_results = {} self.estimator = lgb.train(self.model_params, train, valid_sets=[train, valid], valid_names=['train', 'valid'], evals_result=evaluation_results, num_boost_round=self.training_params.number_boosting_rounds, early_stopping_rounds=self.training_params.early_stopping_rounds, verbose_eval=10, feval=self.evaluation_function) return self