Python sklearn.model_selection.ParameterSampler() Examples
The following are 22
code examples of sklearn.model_selection.ParameterSampler().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.model_selection
, or try the search function
.
Example #1
Source File: fixes.py From skutil with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, X, y=None): """Run fit on the estimator with randomly drawn parameters. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] or [n_samples, n_output], optional Target relative to X for classification or regression; None for unsupervised learning. """ sampled_params = ParameterSampler(self.param_distributions, self.n_iter, random_state=self.random_state) # the super class will handle the X, y validation return self._fit(X, y, sampled_params)
Example #2
Source File: test_search.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_param_sampler(): # test basic properties of param sampler param_distributions = {"kernel": ["rbf", "linear"], "C": uniform(0, 1)} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0) samples = [x for x in sampler] assert_equal(len(samples), 10) for sample in samples: assert_true(sample["kernel"] in ["rbf", "linear"]) assert_true(0 <= sample["C"] <= 1) # test that repeated calls yield identical parameters param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=3, random_state=0) assert_equal([x for x in sampler], [x for x in sampler]) if sp_version >= (0, 16): param_distributions = {"C": uniform(0, 1)} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0) assert_equal([x for x in sampler], [x for x in sampler])
Example #3
Source File: random_search.py From spark-sklearn with Apache License 2.0 | 6 votes |
def fit(self, X, y=None, groups=None): """Run fit on the estimator with randomly drawn parameters. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] or [n_samples, n_output], optional Target relative to X for classification or regression; None for unsupervised learning. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. """ sampled_params = ParameterSampler(self.param_distributions, self.n_iter, random_state=self.random_state) return self._fit(X, y, groups, sampled_params)
Example #4
Source File: test_search.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_param_sampler(): # test basic properties of param sampler param_distributions = {"kernel": ["rbf", "linear"], "C": uniform(0, 1)} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0) samples = [x for x in sampler] assert_equal(len(samples), 10) for sample in samples: assert sample["kernel"] in ["rbf", "linear"] assert 0 <= sample["C"] <= 1 # test that repeated calls yield identical parameters param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=3, random_state=0) assert_equal([x for x in sampler], [x for x in sampler]) if sp_version >= (0, 16): param_distributions = {"C": uniform(0, 1)} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0) assert_equal([x for x in sampler], [x for x in sampler])
Example #5
Source File: example.py From spotlight with MIT License | 6 votes |
def sample_hyperparameters(random_state, num): space = { 'n_iter': N_ITER, 'batch_size': BATCH_SIZE, 'l2': L2, 'learning_rate': LEARNING_RATES, 'loss': LOSSES, 'embedding_dim': EMBEDDING_DIM, } sampler = ParameterSampler(space, n_iter=num, random_state=random_state) for params in sampler: yield params
Example #6
Source File: movielens_sequence.py From spotlight with MIT License | 6 votes |
def sample_cnn_hyperparameters(random_state, num): space = { 'n_iter': N_ITER, 'batch_size': BATCH_SIZE, 'l2': L2, 'learning_rate': LEARNING_RATES, 'loss': LOSSES, 'embedding_dim': EMBEDDING_DIM, 'kernel_width': [3, 5, 7], 'num_layers': list(range(1, 10)), 'dilation_multiplier': [1, 2], 'nonlinearity': ['tanh', 'relu'], 'residual': [True, False] } sampler = ParameterSampler(space, n_iter=num, random_state=random_state) for params in sampler: params['dilation'] = list(params['dilation_multiplier'] ** (i % 8) for i in range(params['num_layers'])) yield params
Example #7
Source File: movielens_sequence.py From spotlight with MIT License | 6 votes |
def sample_lstm_hyperparameters(random_state, num): space = { 'n_iter': N_ITER, 'batch_size': BATCH_SIZE, 'l2': L2, 'learning_rate': LEARNING_RATES, 'loss': LOSSES, 'embedding_dim': EMBEDDING_DIM, } sampler = ParameterSampler(space, n_iter=num, random_state=random_state) for params in sampler: yield params
Example #8
Source File: movielens_sequence.py From spotlight with MIT License | 6 votes |
def sample_pooling_hyperparameters(random_state, num): space = { 'n_iter': N_ITER, 'batch_size': BATCH_SIZE, 'l2': L2, 'learning_rate': LEARNING_RATES, 'loss': LOSSES, 'embedding_dim': EMBEDDING_DIM, } sampler = ParameterSampler(space, n_iter=num, random_state=random_state) for params in sampler: yield params
Example #9
Source File: test_search.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_parameters_sampler_replacement(): # raise error if n_iter too large params = {'first': [0, 1], 'second': ['a', 'b', 'c']} sampler = ParameterSampler(params, n_iter=7) assert_raises(ValueError, list, sampler) # degenerates to GridSearchCV if n_iter the same as grid_size sampler = ParameterSampler(params, n_iter=6) samples = list(sampler) assert_equal(len(samples), 6) for values in ParameterGrid(params): assert_true(values in samples) # test sampling without replacement in a large grid params = {'a': range(10), 'b': range(10), 'c': range(10)} sampler = ParameterSampler(params, n_iter=99, random_state=42) samples = list(sampler) assert_equal(len(samples), 99) hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c']) for p in samples] assert_equal(len(set(hashable_samples)), 99) # doesn't go into infinite loops params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']} sampler = ParameterSampler(params_distribution, n_iter=7) samples = list(sampler) assert_equal(len(samples), 7)
Example #10
Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper_abbr(self): df = pdml.ModelFrame([]) # Splitter Classes self.assertIs(df.ms.KFold, ms.KFold) self.assertIs(df.ms.GroupKFold, ms.GroupKFold) self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold) self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut) self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut) self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut) self.assertIs(df.ms.LeavePOut, ms.LeavePOut) self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit) self.assertIs(df.ms.GroupShuffleSplit, ms.GroupShuffleSplit) # self.assertIs(df.ms.StratifiedShuffleSplit, # ms.StratifiedShuffleSplit) self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit) self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit) # Splitter Functions # Hyper-parameter optimizers self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV) self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV) self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid) self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler) # Model validation
Example #11
Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) # Splitter Classes self.assertIs(df.model_selection.KFold, ms.KFold) self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold) self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold) self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut) self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut) self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut) self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut) self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit) self.assertIs(df.model_selection.GroupShuffleSplit, ms.GroupShuffleSplit) # self.assertIs(df.model_selection.StratifiedShuffleSplit, # ms.StratifiedShuffleSplit) self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit) self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit) # Splitter Functions # Hyper-parameter optimizers self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV) self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV) self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid) self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler) # Model validation
Example #12
Source File: _incremental.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _get_params(self): if self.n_initial_parameters == "grid": return ParameterGrid(self.parameters) else: return ParameterSampler( self.parameters, self.n_initial_parameters, random_state=self.random_state, )
Example #13
Source File: _incremental.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _get_cv_results(self, history, model_hist): cv_results = {} best_scores = {} best_scores = {k: hist[-1]["score"] for k, hist in model_hist.items()} cv_results = {} for k, hist in model_hist.items(): pf_times = list(toolz.pluck("partial_fit_time", hist)) score_times = list(toolz.pluck("score_time", hist)) cv_results[k] = { "mean_partial_fit_time": np.mean(pf_times), "mean_score_time": np.mean(score_times), "std_partial_fit_time": np.std(pf_times), "std_score_time": np.std(score_times), "test_score": best_scores[k], "model_id": k, "params": hist[0]["params"], "partial_fit_calls": hist[-1]["partial_fit_calls"], } cv_results = list(cv_results.values()) # list of dicts cv_results = {k: [res[k] for res in cv_results] for k in cv_results[0]} # Every model will have the same params because this class uses either # ParameterSampler or ParameterGrid params = defaultdict(list) for model_params in cv_results["params"]: for k, v in model_params.items(): params[k].append(v) for k, v in params.items(): cv_results["param_" + k] = v cv_results = {k: np.array(v) for k, v in cv_results.items()} cv_results["rank_test_score"] = scipy.stats.rankdata( -cv_results["test_score"], method="min" ).astype(int) return cv_results
Example #14
Source File: _search.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _get_param_iterator(self): """Return ParameterSampler instance for the given distributions""" return model_selection.ParameterSampler( self.param_distributions, self.n_iter, random_state=self.random_state )
Example #15
Source File: _search.py From dislib with Apache License 2.0 | 5 votes |
def _run_search(self, evaluate_candidates): """Search n_iter candidates from param_distributions""" ps = ParameterSampler(self.param_distributions, self.n_iter, random_state=self.random_state) evaluate_candidates(ps)
Example #16
Source File: v2clean.py From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 | 5 votes |
def tuning(mode, n_iter, n_gpu, devices, save_interval, n_blocks, block_id): if n_gpu == -1: n_gpu = len(devices.split(',')) space = [ { 'loss': ['arcface'], # 'verifythresh': [20, 30, 40, 50], # 'freqthresh': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], }, ] if mode == 'grid': candidate_list = list(ParameterGrid(space)) elif mode == 'random': candidate_list = list(ParameterSampler(space, n_iter, random_state=params['seed'])) else: raise ValueError n_per_block = math.ceil(len(candidate_list) / n_blocks) candidate_chunk = candidate_list[block_id * n_per_block: (block_id + 1) * n_per_block] utils.launch_tuning(mode=mode, n_iter=n_iter, n_gpu=n_gpu, devices=devices, params=params, root=ROOT, save_interval=save_interval, candidate_list=candidate_chunk)
Example #17
Source File: v1only.py From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 | 5 votes |
def tuning(mode, n_iter, n_gpu, devices, save_interval, n_blocks, block_id): if n_gpu == -1: n_gpu = len(devices.split(',')) space = [ { # 'loss': ['arcface', 'cosface'], 'loss': ['arcface', 'cosface', 'softmax'], 'epochs': [5], 'augmentation': ['soft'], }, ] if mode == 'grid': candidate_list = list(ParameterGrid(space)) elif mode == 'random': candidate_list = list(ParameterSampler(space, n_iter, random_state=params['seed'])) else: raise ValueError n_per_block = math.ceil(len(candidate_list) / n_blocks) candidate_chunk = candidate_list[block_id * n_per_block: (block_id + 1) * n_per_block] utils.launch_tuning(mode=mode, n_iter=n_iter, n_gpu=n_gpu, devices=devices, params=params, root=ROOT, save_interval=save_interval, candidate_list=candidate_chunk)
Example #18
Source File: test_search.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_parameters_sampler_replacement(): # raise warning if n_iter is bigger than total parameter space params = {'first': [0, 1], 'second': ['a', 'b', 'c']} sampler = ParameterSampler(params, n_iter=7) n_iter = 7 grid_size = 6 expected_warning = ('The total space of parameters %d is smaller ' 'than n_iter=%d. Running %d iterations. For ' 'exhaustive searches, use GridSearchCV.' % (grid_size, n_iter, grid_size)) assert_warns_message(UserWarning, expected_warning, list, sampler) # degenerates to GridSearchCV if n_iter the same as grid_size sampler = ParameterSampler(params, n_iter=6) samples = list(sampler) assert_equal(len(samples), 6) for values in ParameterGrid(params): assert values in samples # test sampling without replacement in a large grid params = {'a': range(10), 'b': range(10), 'c': range(10)} sampler = ParameterSampler(params, n_iter=99, random_state=42) samples = list(sampler) assert_equal(len(samples), 99) hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c']) for p in samples] assert_equal(len(set(hashable_samples)), 99) # doesn't go into infinite loops params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']} sampler = ParameterSampler(params_distribution, n_iter=7) samples = list(sampler) assert_equal(len(samples), 7)
Example #19
Source File: regression.py From HungaBunga with MIT License | 5 votes |
def run_one_regressor(x, y, small = True, normalize_x = True, n_jobs=cpu_count()-1, brain=False, test_size=0.2, n_splits=5, upsample=True, scoring=None, verbose=False, grid_search=True): all_params = (linear_models_n_params_small if small else linear_models_n_params) + (nn_models_n_params_small if small else nn_models_n_params) + ([] if small else gaussianprocess_models_n_params) + neighbor_models_n_params + (svm_models_n_params_small if small else svm_models_n_params) + (tree_models_n_params_small if small else tree_models_n_params) all_params = random.choice(all_params) return all_params[0](**(list(ParameterSampler(all_params[1], n_iter=1))[0]))
Example #20
Source File: classification.py From HungaBunga with MIT License | 5 votes |
def run_one_classifier(x, y, small = True, normalize_x = True, n_jobs=cpu_count()-1, brain=False, test_size=0.2, n_splits=5, upsample=True, scoring=None, verbose=False, grid_search=True): all_params = (linear_models_n_params_small if small else linear_models_n_params) + (nn_models_n_params_small if small else nn_models_n_params) + ([] if small else gaussianprocess_models_n_params) + neighbor_models_n_params + (svm_models_n_params_small if small else svm_models_n_params) + (tree_models_n_params_small if small else tree_models_n_params) all_params = random.choice(all_params) return all_params[0](**(list(ParameterSampler(all_params[1], n_iter=1))[0]))
Example #21
Source File: grid_search.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, frame): """Fit the grid search. Parameters ---------- frame : H2OFrame, shape=(n_samples, n_features) The training frame on which to fit. """ sampled_params = ParameterSampler(self.param_grid, self.n_iter, random_state=self.random_state) # set our score class self.scoring_class_ = GainsStatisticalReport(**self.grsttngs_) # we can do this once to avoid many as_data_frame operations exp, loss, prem = _val_exp_loss_prem(self.exposure_feature, self.loss_feature, self.premium_feature) self.extra_args_ = { 'expo': _as_numpy(frame[exp]), 'loss': _as_numpy(frame[loss]), 'prem': _as_numpy(frame[prem]) if prem is not None else None } # for validation set self.extra_names_ = { 'expo': exp, 'loss': loss, 'prem': prem } # do fit the_fit = self._fit(frame, sampled_params) # clear extra_args_, because they might take lots of mem # we can do this because a re-fit will re-assign them anyways. # don't delete the extra_names_ though, because they're used in # scoring the incoming frame. del self.extra_args_ return the_fit
Example #22
Source File: grid_search.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, frame): """Fit the grid search. Parameters ---------- frame : H2OFrame, shape=(n_samples, n_features) The training frame on which to fit. """ sampled_params = ParameterSampler(self.param_grid, self.n_iter, random_state=self.random_state) return self._fit(frame, sampled_params)