Python sklearn.model_selection.ParameterSampler() Examples

The following are 22 code examples for showing how to use sklearn.model_selection.ParameterSampler(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function .

Example 1
Project: skutil   Author: tgsmith61591   File: fixes.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, y=None):
        """Run fit on the estimator with randomly drawn parameters.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.
        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.
        """
        sampled_params = ParameterSampler(self.param_distributions,
                                          self.n_iter,
                                          random_state=self.random_state)

        # the super class will handle the X, y validation
        return self._fit(X, y, sampled_params) 
Example 2
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_search.py    License: MIT License 6 votes vote down vote up
def test_param_sampler():
    # test basic properties of param sampler
    param_distributions = {"kernel": ["rbf", "linear"],
                           "C": uniform(0, 1)}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=10, random_state=0)
    samples = [x for x in sampler]
    assert_equal(len(samples), 10)
    for sample in samples:
        assert sample["kernel"] in ["rbf", "linear"]
        assert 0 <= sample["C"] <= 1

    # test that repeated calls yield identical parameters
    param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=3, random_state=0)
    assert_equal([x for x in sampler], [x for x in sampler])

    if sp_version >= (0, 16):
        param_distributions = {"C": uniform(0, 1)}
        sampler = ParameterSampler(param_distributions=param_distributions,
                                   n_iter=10, random_state=0)
        assert_equal([x for x in sampler], [x for x in sampler]) 
Example 3
Project: spotlight   Author: maciejkula   File: example.py    License: MIT License 6 votes vote down vote up
def sample_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)

    for params in sampler:
        yield params 
Example 4
Project: spotlight   Author: maciejkula   File: movielens_sequence.py    License: MIT License 6 votes vote down vote up
def sample_cnn_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
        'kernel_width': [3, 5, 7],
        'num_layers': list(range(1, 10)),
        'dilation_multiplier': [1, 2],
        'nonlinearity': ['tanh', 'relu'],
        'residual': [True, False]
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)

    for params in sampler:
        params['dilation'] = list(params['dilation_multiplier'] ** (i % 8)
                                  for i in range(params['num_layers']))

        yield params 
Example 5
Project: spotlight   Author: maciejkula   File: movielens_sequence.py    License: MIT License 6 votes vote down vote up
def sample_lstm_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)

    for params in sampler:

        yield params 
Example 6
Project: spotlight   Author: maciejkula   File: movielens_sequence.py    License: MIT License 6 votes vote down vote up
def sample_pooling_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)

    for params in sampler:

        yield params 
Example 7
Project: spark-sklearn   Author: databricks   File: random_search.py    License: Apache License 2.0 6 votes vote down vote up
def fit(self, X, y=None, groups=None):
        """Run fit on the estimator with randomly drawn parameters.

        Parameters
        ----------

        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.
        """
        sampled_params = ParameterSampler(self.param_distributions,
                                          self.n_iter,
                                          random_state=self.random_state)

        return self._fit(X, y, groups, sampled_params) 
Example 8
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_search.py    License: MIT License 6 votes vote down vote up
def test_param_sampler():
    # test basic properties of param sampler
    param_distributions = {"kernel": ["rbf", "linear"],
                           "C": uniform(0, 1)}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=10, random_state=0)
    samples = [x for x in sampler]
    assert_equal(len(samples), 10)
    for sample in samples:
        assert_true(sample["kernel"] in ["rbf", "linear"])
        assert_true(0 <= sample["C"] <= 1)

    # test that repeated calls yield identical parameters
    param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=3, random_state=0)
    assert_equal([x for x in sampler], [x for x in sampler])

    if sp_version >= (0, 16):
        param_distributions = {"C": uniform(0, 1)}
        sampler = ParameterSampler(param_distributions=param_distributions,
                                   n_iter=10, random_state=0)
        assert_equal([x for x in sampler], [x for x in sampler]) 
Example 9
Project: skutil   Author: tgsmith61591   File: grid_search.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, frame):
        """Fit the grid search.

        Parameters
        ----------

        frame : H2OFrame, shape=(n_samples, n_features)
            The training frame on which to fit.
        """
        sampled_params = ParameterSampler(self.param_grid,
                                          self.n_iter,
                                          random_state=self.random_state)

        return self._fit(frame, sampled_params) 
Example 10
Project: skutil   Author: tgsmith61591   File: grid_search.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, frame):
        """Fit the grid search.

        Parameters
        ----------

        frame : H2OFrame, shape=(n_samples, n_features)
            The training frame on which to fit.
        """
        sampled_params = ParameterSampler(self.param_grid,
                                          self.n_iter,
                                          random_state=self.random_state)

        # set our score class
        self.scoring_class_ = GainsStatisticalReport(**self.grsttngs_)

        # we can do this once to avoid many as_data_frame operations
        exp, loss, prem = _val_exp_loss_prem(self.exposure_feature, self.loss_feature, self.premium_feature)
        self.extra_args_ = {
            'expo': _as_numpy(frame[exp]),
            'loss': _as_numpy(frame[loss]),
            'prem': _as_numpy(frame[prem]) if prem is not None else None
        }

        # for validation set
        self.extra_names_ = {
            'expo': exp,
            'loss': loss,
            'prem': prem
        }

        # do fit
        the_fit = self._fit(frame, sampled_params)

        # clear extra_args_, because they might take lots of mem
        # we can do this because a re-fit will re-assign them anyways.
        # don't delete the extra_names_ though, because they're used in
        # scoring the incoming frame.
        del self.extra_args_

        return the_fit 
Example 11
Project: HungaBunga   Author: ypeleg   File: classification.py    License: MIT License 5 votes vote down vote up
def run_one_classifier(x, y, small = True, normalize_x = True, n_jobs=cpu_count()-1, brain=False, test_size=0.2, n_splits=5, upsample=True, scoring=None, verbose=False, grid_search=True):
    all_params = (linear_models_n_params_small if small else linear_models_n_params) +  (nn_models_n_params_small if small else nn_models_n_params) + ([] if small else gaussianprocess_models_n_params) + neighbor_models_n_params + (svm_models_n_params_small if small else svm_models_n_params) + (tree_models_n_params_small if small else tree_models_n_params)
    all_params = random.choice(all_params)
    return all_params[0](**(list(ParameterSampler(all_params[1], n_iter=1))[0])) 
Example 12
Project: HungaBunga   Author: ypeleg   File: regression.py    License: MIT License 5 votes vote down vote up
def run_one_regressor(x, y, small = True, normalize_x = True, n_jobs=cpu_count()-1, brain=False, test_size=0.2, n_splits=5, upsample=True, scoring=None, verbose=False, grid_search=True):
    all_params = (linear_models_n_params_small if small else linear_models_n_params) + (nn_models_n_params_small if small else nn_models_n_params) + ([] if small else gaussianprocess_models_n_params) + neighbor_models_n_params + (svm_models_n_params_small if small else svm_models_n_params) + (tree_models_n_params_small if small else tree_models_n_params)
    all_params = random.choice(all_params)
    return all_params[0](**(list(ParameterSampler(all_params[1], n_iter=1))[0])) 
Example 13
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_search.py    License: MIT License 5 votes vote down vote up
def test_parameters_sampler_replacement():
    # raise warning if n_iter is bigger than total parameter space
    params = {'first': [0, 1], 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params, n_iter=7)
    n_iter = 7
    grid_size = 6
    expected_warning = ('The total space of parameters %d is smaller '
                        'than n_iter=%d. Running %d iterations. For '
                        'exhaustive searches, use GridSearchCV.'
                        % (grid_size, n_iter, grid_size))
    assert_warns_message(UserWarning, expected_warning,
                         list, sampler)

    # degenerates to GridSearchCV if n_iter the same as grid_size
    sampler = ParameterSampler(params, n_iter=6)
    samples = list(sampler)
    assert_equal(len(samples), 6)
    for values in ParameterGrid(params):
        assert values in samples

    # test sampling without replacement in a large grid
    params = {'a': range(10), 'b': range(10), 'c': range(10)}
    sampler = ParameterSampler(params, n_iter=99, random_state=42)
    samples = list(sampler)
    assert_equal(len(samples), 99)
    hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c'])
                        for p in samples]
    assert_equal(len(set(hashable_samples)), 99)

    # doesn't go into infinite loops
    params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params_distribution, n_iter=7)
    samples = list(sampler)
    assert_equal(len(samples), 7) 
Example 14
Project: Landmark2019-1st-and-3rd-Place-Solution   Author: lyakaap   File: v1only.py    License: Apache License 2.0 5 votes vote down vote up
def tuning(mode, n_iter, n_gpu, devices, save_interval, n_blocks, block_id):

    if n_gpu == -1:
        n_gpu = len(devices.split(','))

    space = [
        {
            # 'loss': ['arcface', 'cosface'],
            'loss': ['arcface', 'cosface', 'softmax'],
            'epochs': [5],
            'augmentation': ['soft'],
        },
    ]

    if mode == 'grid':
        candidate_list = list(ParameterGrid(space))
    elif mode == 'random':
        candidate_list = list(ParameterSampler(space, n_iter, random_state=params['seed']))
    else:
        raise ValueError

    n_per_block = math.ceil(len(candidate_list) / n_blocks)
    candidate_chunk = candidate_list[block_id * n_per_block: (block_id + 1) * n_per_block]

    utils.launch_tuning(mode=mode, n_iter=n_iter, n_gpu=n_gpu, devices=devices,
                        params=params, root=ROOT, save_interval=save_interval,
                        candidate_list=candidate_chunk) 
Example 15
Project: Landmark2019-1st-and-3rd-Place-Solution   Author: lyakaap   File: v2clean.py    License: Apache License 2.0 5 votes vote down vote up
def tuning(mode, n_iter, n_gpu, devices, save_interval, n_blocks, block_id):

    if n_gpu == -1:
        n_gpu = len(devices.split(','))

    space = [
        {
            'loss': ['arcface'],
            # 'verifythresh': [20, 30, 40, 50],
            # 'freqthresh': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        },
    ]

    if mode == 'grid':
        candidate_list = list(ParameterGrid(space))
    elif mode == 'random':
        candidate_list = list(ParameterSampler(space, n_iter, random_state=params['seed']))
    else:
        raise ValueError

    n_per_block = math.ceil(len(candidate_list) / n_blocks)
    candidate_chunk = candidate_list[block_id * n_per_block: (block_id + 1) * n_per_block]

    utils.launch_tuning(mode=mode, n_iter=n_iter, n_gpu=n_gpu, devices=devices,
                        params=params, root=ROOT, save_interval=save_interval,
                        candidate_list=candidate_chunk) 
Example 16
Project: dislib   Author: bsc-wdc   File: _search.py    License: Apache License 2.0 5 votes vote down vote up
def _run_search(self, evaluate_candidates):
        """Search n_iter candidates from param_distributions"""
        ps = ParameterSampler(self.param_distributions, self.n_iter,
                              random_state=self.random_state)
        evaluate_candidates(ps) 
Example 17
Project: dask-ml   Author: dask   File: _search.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _get_param_iterator(self):
        """Return ParameterSampler instance for the given distributions"""
        return model_selection.ParameterSampler(
            self.param_distributions, self.n_iter, random_state=self.random_state
        ) 
Example 18
Project: dask-ml   Author: dask   File: _incremental.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _get_cv_results(self, history, model_hist):
        cv_results = {}
        best_scores = {}
        best_scores = {k: hist[-1]["score"] for k, hist in model_hist.items()}

        cv_results = {}
        for k, hist in model_hist.items():
            pf_times = list(toolz.pluck("partial_fit_time", hist))
            score_times = list(toolz.pluck("score_time", hist))
            cv_results[k] = {
                "mean_partial_fit_time": np.mean(pf_times),
                "mean_score_time": np.mean(score_times),
                "std_partial_fit_time": np.std(pf_times),
                "std_score_time": np.std(score_times),
                "test_score": best_scores[k],
                "model_id": k,
                "params": hist[0]["params"],
                "partial_fit_calls": hist[-1]["partial_fit_calls"],
            }
        cv_results = list(cv_results.values())  # list of dicts
        cv_results = {k: [res[k] for res in cv_results] for k in cv_results[0]}

        # Every model will have the same params because this class uses either
        # ParameterSampler or ParameterGrid
        params = defaultdict(list)
        for model_params in cv_results["params"]:
            for k, v in model_params.items():
                params[k].append(v)

        for k, v in params.items():
            cv_results["param_" + k] = v

        cv_results = {k: np.array(v) for k, v in cv_results.items()}
        cv_results["rank_test_score"] = scipy.stats.rankdata(
            -cv_results["test_score"], method="min"
        ).astype(int)
        return cv_results 
Example 19
Project: dask-ml   Author: dask   File: _incremental.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _get_params(self):
        if self.n_initial_parameters == "grid":
            return ParameterGrid(self.parameters)
        else:
            return ParameterSampler(
                self.parameters,
                self.n_initial_parameters,
                random_state=self.random_state,
            ) 
Example 20
Project: pandas-ml   Author: pandas-ml   File: test_model_selection.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.model_selection.KFold, ms.KFold)
        self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold)
        self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut)

        self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.model_selection.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.model_selection.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler)

        # Model validation 
Example 21
Project: pandas-ml   Author: pandas-ml   File: test_model_selection.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper_abbr(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.ms.KFold, ms.KFold)
        self.assertIs(df.ms.GroupKFold, ms.GroupKFold)
        self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.ms.LeavePOut, ms.LeavePOut)

        self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.ms.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.ms.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler)

        # Model validation 
Example 22
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_search.py    License: MIT License 5 votes vote down vote up
def test_parameters_sampler_replacement():
    # raise error if n_iter too large
    params = {'first': [0, 1], 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params, n_iter=7)
    assert_raises(ValueError, list, sampler)
    # degenerates to GridSearchCV if n_iter the same as grid_size
    sampler = ParameterSampler(params, n_iter=6)
    samples = list(sampler)
    assert_equal(len(samples), 6)
    for values in ParameterGrid(params):
        assert_true(values in samples)

    # test sampling without replacement in a large grid
    params = {'a': range(10), 'b': range(10), 'c': range(10)}
    sampler = ParameterSampler(params, n_iter=99, random_state=42)
    samples = list(sampler)
    assert_equal(len(samples), 99)
    hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c'])
                        for p in samples]
    assert_equal(len(set(hashable_samples)), 99)

    # doesn't go into infinite loops
    params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params_distribution, n_iter=7)
    samples = list(sampler)
    assert_equal(len(samples), 7)