Python sklearn.model_selection.ParameterSampler() Examples

The following are 22 code examples of sklearn.model_selection.ParameterSampler(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function .
Example #1
Source File: fixes.py    From skutil with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, y=None):
        """Run fit on the estimator with randomly drawn parameters.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.
        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.
        """
        sampled_params = ParameterSampler(self.param_distributions,
                                          self.n_iter,
                                          random_state=self.random_state)

        # the super class will handle the X, y validation
        return self._fit(X, y, sampled_params) 
Example #2
Source File: test_search.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_param_sampler():
    # test basic properties of param sampler
    param_distributions = {"kernel": ["rbf", "linear"],
                           "C": uniform(0, 1)}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=10, random_state=0)
    samples = [x for x in sampler]
    assert_equal(len(samples), 10)
    for sample in samples:
        assert_true(sample["kernel"] in ["rbf", "linear"])
        assert_true(0 <= sample["C"] <= 1)

    # test that repeated calls yield identical parameters
    param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=3, random_state=0)
    assert_equal([x for x in sampler], [x for x in sampler])

    if sp_version >= (0, 16):
        param_distributions = {"C": uniform(0, 1)}
        sampler = ParameterSampler(param_distributions=param_distributions,
                                   n_iter=10, random_state=0)
        assert_equal([x for x in sampler], [x for x in sampler]) 
Example #3
Source File: random_search.py    From spark-sklearn with Apache License 2.0 6 votes vote down vote up
def fit(self, X, y=None, groups=None):
        """Run fit on the estimator with randomly drawn parameters.

        Parameters
        ----------

        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.
        """
        sampled_params = ParameterSampler(self.param_distributions,
                                          self.n_iter,
                                          random_state=self.random_state)

        return self._fit(X, y, groups, sampled_params) 
Example #4
Source File: test_search.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_param_sampler():
    # test basic properties of param sampler
    param_distributions = {"kernel": ["rbf", "linear"],
                           "C": uniform(0, 1)}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=10, random_state=0)
    samples = [x for x in sampler]
    assert_equal(len(samples), 10)
    for sample in samples:
        assert sample["kernel"] in ["rbf", "linear"]
        assert 0 <= sample["C"] <= 1

    # test that repeated calls yield identical parameters
    param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=3, random_state=0)
    assert_equal([x for x in sampler], [x for x in sampler])

    if sp_version >= (0, 16):
        param_distributions = {"C": uniform(0, 1)}
        sampler = ParameterSampler(param_distributions=param_distributions,
                                   n_iter=10, random_state=0)
        assert_equal([x for x in sampler], [x for x in sampler]) 
Example #5
Source File: example.py    From spotlight with MIT License 6 votes vote down vote up
def sample_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)

    for params in sampler:
        yield params 
Example #6
Source File: movielens_sequence.py    From spotlight with MIT License 6 votes vote down vote up
def sample_cnn_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
        'kernel_width': [3, 5, 7],
        'num_layers': list(range(1, 10)),
        'dilation_multiplier': [1, 2],
        'nonlinearity': ['tanh', 'relu'],
        'residual': [True, False]
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)

    for params in sampler:
        params['dilation'] = list(params['dilation_multiplier'] ** (i % 8)
                                  for i in range(params['num_layers']))

        yield params 
Example #7
Source File: movielens_sequence.py    From spotlight with MIT License 6 votes vote down vote up
def sample_lstm_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)

    for params in sampler:

        yield params 
Example #8
Source File: movielens_sequence.py    From spotlight with MIT License 6 votes vote down vote up
def sample_pooling_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)

    for params in sampler:

        yield params 
Example #9
Source File: test_search.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_parameters_sampler_replacement():
    # raise error if n_iter too large
    params = {'first': [0, 1], 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params, n_iter=7)
    assert_raises(ValueError, list, sampler)
    # degenerates to GridSearchCV if n_iter the same as grid_size
    sampler = ParameterSampler(params, n_iter=6)
    samples = list(sampler)
    assert_equal(len(samples), 6)
    for values in ParameterGrid(params):
        assert_true(values in samples)

    # test sampling without replacement in a large grid
    params = {'a': range(10), 'b': range(10), 'c': range(10)}
    sampler = ParameterSampler(params, n_iter=99, random_state=42)
    samples = list(sampler)
    assert_equal(len(samples), 99)
    hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c'])
                        for p in samples]
    assert_equal(len(set(hashable_samples)), 99)

    # doesn't go into infinite loops
    params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params_distribution, n_iter=7)
    samples = list(sampler)
    assert_equal(len(samples), 7) 
Example #10
Source File: test_model_selection.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper_abbr(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.ms.KFold, ms.KFold)
        self.assertIs(df.ms.GroupKFold, ms.GroupKFold)
        self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.ms.LeavePOut, ms.LeavePOut)

        self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.ms.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.ms.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler)

        # Model validation 
Example #11
Source File: test_model_selection.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.model_selection.KFold, ms.KFold)
        self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold)
        self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut)

        self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.model_selection.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.model_selection.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler)

        # Model validation 
Example #12
Source File: _incremental.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _get_params(self):
        if self.n_initial_parameters == "grid":
            return ParameterGrid(self.parameters)
        else:
            return ParameterSampler(
                self.parameters,
                self.n_initial_parameters,
                random_state=self.random_state,
            ) 
Example #13
Source File: _incremental.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _get_cv_results(self, history, model_hist):
        cv_results = {}
        best_scores = {}
        best_scores = {k: hist[-1]["score"] for k, hist in model_hist.items()}

        cv_results = {}
        for k, hist in model_hist.items():
            pf_times = list(toolz.pluck("partial_fit_time", hist))
            score_times = list(toolz.pluck("score_time", hist))
            cv_results[k] = {
                "mean_partial_fit_time": np.mean(pf_times),
                "mean_score_time": np.mean(score_times),
                "std_partial_fit_time": np.std(pf_times),
                "std_score_time": np.std(score_times),
                "test_score": best_scores[k],
                "model_id": k,
                "params": hist[0]["params"],
                "partial_fit_calls": hist[-1]["partial_fit_calls"],
            }
        cv_results = list(cv_results.values())  # list of dicts
        cv_results = {k: [res[k] for res in cv_results] for k in cv_results[0]}

        # Every model will have the same params because this class uses either
        # ParameterSampler or ParameterGrid
        params = defaultdict(list)
        for model_params in cv_results["params"]:
            for k, v in model_params.items():
                params[k].append(v)

        for k, v in params.items():
            cv_results["param_" + k] = v

        cv_results = {k: np.array(v) for k, v in cv_results.items()}
        cv_results["rank_test_score"] = scipy.stats.rankdata(
            -cv_results["test_score"], method="min"
        ).astype(int)
        return cv_results 
Example #14
Source File: _search.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _get_param_iterator(self):
        """Return ParameterSampler instance for the given distributions"""
        return model_selection.ParameterSampler(
            self.param_distributions, self.n_iter, random_state=self.random_state
        ) 
Example #15
Source File: _search.py    From dislib with Apache License 2.0 5 votes vote down vote up
def _run_search(self, evaluate_candidates):
        """Search n_iter candidates from param_distributions"""
        ps = ParameterSampler(self.param_distributions, self.n_iter,
                              random_state=self.random_state)
        evaluate_candidates(ps) 
Example #16
Source File: v2clean.py    From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 5 votes vote down vote up
def tuning(mode, n_iter, n_gpu, devices, save_interval, n_blocks, block_id):

    if n_gpu == -1:
        n_gpu = len(devices.split(','))

    space = [
        {
            'loss': ['arcface'],
            # 'verifythresh': [20, 30, 40, 50],
            # 'freqthresh': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        },
    ]

    if mode == 'grid':
        candidate_list = list(ParameterGrid(space))
    elif mode == 'random':
        candidate_list = list(ParameterSampler(space, n_iter, random_state=params['seed']))
    else:
        raise ValueError

    n_per_block = math.ceil(len(candidate_list) / n_blocks)
    candidate_chunk = candidate_list[block_id * n_per_block: (block_id + 1) * n_per_block]

    utils.launch_tuning(mode=mode, n_iter=n_iter, n_gpu=n_gpu, devices=devices,
                        params=params, root=ROOT, save_interval=save_interval,
                        candidate_list=candidate_chunk) 
Example #17
Source File: v1only.py    From Landmark2019-1st-and-3rd-Place-Solution with Apache License 2.0 5 votes vote down vote up
def tuning(mode, n_iter, n_gpu, devices, save_interval, n_blocks, block_id):

    if n_gpu == -1:
        n_gpu = len(devices.split(','))

    space = [
        {
            # 'loss': ['arcface', 'cosface'],
            'loss': ['arcface', 'cosface', 'softmax'],
            'epochs': [5],
            'augmentation': ['soft'],
        },
    ]

    if mode == 'grid':
        candidate_list = list(ParameterGrid(space))
    elif mode == 'random':
        candidate_list = list(ParameterSampler(space, n_iter, random_state=params['seed']))
    else:
        raise ValueError

    n_per_block = math.ceil(len(candidate_list) / n_blocks)
    candidate_chunk = candidate_list[block_id * n_per_block: (block_id + 1) * n_per_block]

    utils.launch_tuning(mode=mode, n_iter=n_iter, n_gpu=n_gpu, devices=devices,
                        params=params, root=ROOT, save_interval=save_interval,
                        candidate_list=candidate_chunk) 
Example #18
Source File: test_search.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_parameters_sampler_replacement():
    # raise warning if n_iter is bigger than total parameter space
    params = {'first': [0, 1], 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params, n_iter=7)
    n_iter = 7
    grid_size = 6
    expected_warning = ('The total space of parameters %d is smaller '
                        'than n_iter=%d. Running %d iterations. For '
                        'exhaustive searches, use GridSearchCV.'
                        % (grid_size, n_iter, grid_size))
    assert_warns_message(UserWarning, expected_warning,
                         list, sampler)

    # degenerates to GridSearchCV if n_iter the same as grid_size
    sampler = ParameterSampler(params, n_iter=6)
    samples = list(sampler)
    assert_equal(len(samples), 6)
    for values in ParameterGrid(params):
        assert values in samples

    # test sampling without replacement in a large grid
    params = {'a': range(10), 'b': range(10), 'c': range(10)}
    sampler = ParameterSampler(params, n_iter=99, random_state=42)
    samples = list(sampler)
    assert_equal(len(samples), 99)
    hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c'])
                        for p in samples]
    assert_equal(len(set(hashable_samples)), 99)

    # doesn't go into infinite loops
    params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params_distribution, n_iter=7)
    samples = list(sampler)
    assert_equal(len(samples), 7) 
Example #19
Source File: regression.py    From HungaBunga with MIT License 5 votes vote down vote up
def run_one_regressor(x, y, small = True, normalize_x = True, n_jobs=cpu_count()-1, brain=False, test_size=0.2, n_splits=5, upsample=True, scoring=None, verbose=False, grid_search=True):
    all_params = (linear_models_n_params_small if small else linear_models_n_params) + (nn_models_n_params_small if small else nn_models_n_params) + ([] if small else gaussianprocess_models_n_params) + neighbor_models_n_params + (svm_models_n_params_small if small else svm_models_n_params) + (tree_models_n_params_small if small else tree_models_n_params)
    all_params = random.choice(all_params)
    return all_params[0](**(list(ParameterSampler(all_params[1], n_iter=1))[0])) 
Example #20
Source File: classification.py    From HungaBunga with MIT License 5 votes vote down vote up
def run_one_classifier(x, y, small = True, normalize_x = True, n_jobs=cpu_count()-1, brain=False, test_size=0.2, n_splits=5, upsample=True, scoring=None, verbose=False, grid_search=True):
    all_params = (linear_models_n_params_small if small else linear_models_n_params) +  (nn_models_n_params_small if small else nn_models_n_params) + ([] if small else gaussianprocess_models_n_params) + neighbor_models_n_params + (svm_models_n_params_small if small else svm_models_n_params) + (tree_models_n_params_small if small else tree_models_n_params)
    all_params = random.choice(all_params)
    return all_params[0](**(list(ParameterSampler(all_params[1], n_iter=1))[0])) 
Example #21
Source File: grid_search.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, frame):
        """Fit the grid search.

        Parameters
        ----------

        frame : H2OFrame, shape=(n_samples, n_features)
            The training frame on which to fit.
        """
        sampled_params = ParameterSampler(self.param_grid,
                                          self.n_iter,
                                          random_state=self.random_state)

        # set our score class
        self.scoring_class_ = GainsStatisticalReport(**self.grsttngs_)

        # we can do this once to avoid many as_data_frame operations
        exp, loss, prem = _val_exp_loss_prem(self.exposure_feature, self.loss_feature, self.premium_feature)
        self.extra_args_ = {
            'expo': _as_numpy(frame[exp]),
            'loss': _as_numpy(frame[loss]),
            'prem': _as_numpy(frame[prem]) if prem is not None else None
        }

        # for validation set
        self.extra_names_ = {
            'expo': exp,
            'loss': loss,
            'prem': prem
        }

        # do fit
        the_fit = self._fit(frame, sampled_params)

        # clear extra_args_, because they might take lots of mem
        # we can do this because a re-fit will re-assign them anyways.
        # don't delete the extra_names_ though, because they're used in
        # scoring the incoming frame.
        del self.extra_args_

        return the_fit 
Example #22
Source File: grid_search.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, frame):
        """Fit the grid search.

        Parameters
        ----------

        frame : H2OFrame, shape=(n_samples, n_features)
            The training frame on which to fit.
        """
        sampled_params = ParameterSampler(self.param_grid,
                                          self.n_iter,
                                          random_state=self.random_state)

        return self._fit(frame, sampled_params)