Python sklearn.model_selection.RandomizedSearchCV() Examples

The following are 30 code examples for showing how to use sklearn.model_selection.RandomizedSearchCV(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function .

Example 1
Project: healthcareai-py   Author: HealthCatalyst   File: transformers.py    License: MIT License 6 votes vote down vote up
def getTunedModel( self, baseModel ):
        n_estimators = [100, 200, 300, 400, 500]
        max_features = ['auto', 'sqrt']
        max_depth = [5, 10, 20, 30, 40, 50]
        min_samples_split = [2, 5, 10]
        min_samples_leaf = [1, 2, 4]
        bootstrap = [True, False]
        
        random_grid = {'n_estimators': n_estimators,
                       'max_features': max_features,
                       'max_depth': max_depth,
                       'min_samples_split': min_samples_split,
                       'min_samples_leaf': min_samples_leaf,
                       'bootstrap': bootstrap}
        #print(random_grid)
        
        model_tuned = RandomizedSearchCV(estimator = baseModel, param_distributions = random_grid, n_iter = 2, cv = 2, verbose=0, random_state=100 , n_jobs = -1)
        return model_tuned
        
  

        
        

###################################################################################################################################### 
Example 2
Project: skutil   Author: tgsmith61591   File: grid_search.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, y=None, groups=None):
            """Run fit on the estimator with randomly drawn parameters.

            Parameters
            ----------

            X : array-like, shape=(n_samples, n_features)
                Training vector, where n_samples is the number of samples and
                n_features is the number of features.

            y : array-like, shape=(n_samples,) or (n_samples, n_output), optional (default=None)
                Target relative to X for classification or regression;
                None for unsupervised learning.

            groups : array-like, shape=(n_samples,), optional (default=None)
                Group labels for the samples used while splitting the dataset into
                train/test set.
            """
            return super(RandomizedSearchCV, self).fit(X, _as_numpy(y), groups) 
Example 3
Project: aletheia   Author: daniellerch   File: models.py    License: MIT License 6 votes vote down vote up
def _prepare_classifier(self, params, n_jobs=1):

        X_train, y_train = params

        tuned_parameters = [{
            'kernel': ['rbf'], 
            'gamma': [1e-4,1e-3,1e-2,1e-1,1e+0,1e+1,1e+2,1e+3,1e+4],
            'C': [1e+0,1e+1,1e+2,1e+3,1e+4,1e+5,1e+6,1e+7,1e+8,1e+9]
        }]

        clf=RandomizedSearchCV(svm.SVC(random_state=self.random_state), 
                               tuned_parameters[0], 
                               n_iter=self.n_randomized_search_iter, 
                               n_jobs=n_jobs, random_state=self.random_state)
        clf.fit(X_train, y_train)
              
        params=clf.best_params_
        clf=svm.SVC(kernel=params['kernel'], C=params['C'], 
            gamma=params['gamma'], probability=True, 
            random_state=self.random_state)
        clf.fit(X_train, y_train)

        return clf 
Example 4
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_search.py    License: MIT License 6 votes vote down vote up
def test_empty_cv_iterator_error():
    # Use global X, y

    # create cv
    cv = KFold(n_splits=3).split(X)

    # pop all of it, this should cause the expected ValueError
    [u for u in cv]
    # cv is empty now

    train_size = 100
    ridge = RandomizedSearchCV(Ridge(), {'alpha': [1e-3, 1e-2, 1e-1]},
                               cv=cv, n_jobs=-1)

    # assert that this raises an error
    with pytest.raises(ValueError,
                       match='No fits were performed. '
                             'Was the CV iterator empty\\? '
                             'Were there no candidates\\?'):
        ridge.fit(X[:train_size], y[:train_size]) 
Example 5
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_search.py    License: MIT License 6 votes vote down vote up
def test_random_search_bad_cv():
    # Use global X, y

    class BrokenKFold(KFold):
        def get_n_splits(self, *args, **kw):
            return 1

    # create bad cv
    cv = BrokenKFold(n_splits=3)

    train_size = 100
    ridge = RandomizedSearchCV(Ridge(), {'alpha': [1e-3, 1e-2, 1e-1]},
                               cv=cv, n_jobs=-1)

    # assert that this raises an error
    with pytest.raises(ValueError,
                       match='cv.split and cv.get_n_splits returned '
                             'inconsistent results. Expected \\d+ '
                             'splits, got \\d+'):
        ridge.fit(X[:train_size], y[:train_size]) 
Example 6
Project: pylift   Author: wayfair   File: base.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def randomized_search(self, **kwargs):
        """Randomized search using sklearn.model_selection.RandomizedSearchCV.

        Any parameters typically associated with RandomizedSearchCV (see
        sklearn documentation) can be passed as keyword arguments to this
        function.

        The final dictionary used for the randomized search is saved to
        `self.randomized_search_params`. This is updated with any parameters
        that are passed.

        Examples
        --------
        # Passing kwargs.
        self.randomized_search(param_distributions={'max_depth':[2,3,5,10]}, refit=True)

        """
        self.randomized_search_params.update(kwargs)
        self.rand_search_ = RandomizedSearchCV(**self.randomized_search_params)
        self.rand_search_.fit(self.x_train, self.transformed_y_train)
        return self.rand_search_ 
Example 7
Project: pylift   Author: df-foundation   File: base.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def randomized_search(self, **kwargs):
        """Randomized search using sklearn.model_selection.RandomizedSearchCV.

        Any parameters typically associated with RandomizedSearchCV (see
        sklearn documentation) can be passed as keyword arguments to this
        function.

        The final dictionary used for the randomized search is saved to
        `self.randomized_search_params`. This is updated with any parameters
        that are passed.

        Examples
        --------
        # Passing kwargs.
        self.randomized_search(param_distributions={'max_depth':[2,3,5,10]}, refit=True)

        """
        self.randomized_search_params.update(kwargs)
        self.rand_search_ = RandomizedSearchCV(**self.randomized_search_params)
        self.rand_search_.fit(self.x_train, self.transformed_y_train)
        return self.rand_search_ 
Example 8
Project: ml-on-gcp   Author: GoogleCloudPlatform   File: gke_parallel.py    License: Apache License 2.0 6 votes vote down vote up
def fit(self, X, y):
        """Deploys `fit` jobs to each worker in the cluster.
        """
        timestamp = str(int(time.time()))
        self.task_name = self.task_name or '{}.{}.{}'.format(self.cluster_id, self.image_name, timestamp)
        self._done = False
        self._cancelled = False

        X_uri, y_uri, _ = self._upload_data(X, y)

        if type(self.search) == GridSearchCV:
            handler = self._handle_grid_search
        elif type(self.search) == RandomizedSearchCV:
            handler = self._handle_randomized_search
        elif type(self.search) == BayesSearchCV:
            handler = self._handle_bayes_search

        print('Fitting {}'.format(type(self.search)))
        handler(X_uri, y_uri)

        self.persist() 
Example 9
Project: privacy   Author: tensorflow   File: trained_attack_models.py    License: Apache License 2.0 6 votes vote down vote up
def random_forest(verbose: int = 0, n_jobs: int = 1):
  """Setup a random forest pipeline with cross-validation."""
  rf = ensemble.RandomForestClassifier()

  n_estimators = [100]
  max_features = ['auto', 'sqrt']
  max_depth = [5, 10, 20]
  max_depth.append(None)
  min_samples_split = [2, 5, 10]
  min_samples_leaf = [1, 2, 4]
  random_grid = {'n_estimators': n_estimators,
                 'max_features': max_features,
                 'max_depth': max_depth,
                 'min_samples_split': min_samples_split,
                 'min_samples_leaf': min_samples_leaf}

  pipe = model_selection.RandomizedSearchCV(
      rf, param_distributions=random_grid, n_iter=7, cv=3, n_jobs=n_jobs,
      iid=False, verbose=verbose)
  return pipe 
Example 10
Project: lale   Author: IBM   File: test_core_operators.py    License: Apache License 2.0 6 votes vote down vote up
def test_with_randomizedsearchcv(self):
        from sklearn.model_selection import RandomizedSearchCV
        from sklearn.datasets import load_iris
        from sklearn.metrics import accuracy_score, make_scorer
        from scipy.stats.distributions import uniform
        import numpy as np
        lr = LogisticRegression()
        parameters = {'solver':('liblinear', 'lbfgs'), 'penalty':['l2']}
        ranges, cat_idx = lr.get_param_ranges()
        min_C, max_C, default_C = ranges['C']
        # specify parameters and distributions to sample from
        #the loguniform distribution needs to be taken care of properly
        param_dist = {"solver": ranges['solver'],
                      "C": uniform(min_C, np.log(max_C))}
        # run randomized search
        n_iter_search = 5
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            random_search = RandomizedSearchCV(
                lr, param_distributions=param_dist, n_iter=n_iter_search, cv=5,
                scoring=make_scorer(accuracy_score))
            iris = load_iris()
            random_search.fit(iris.data, iris.target) 
Example 11
Project: causallib   Author: IBM   File: classifier_selection.py    License: Apache License 2.0 6 votes vote down vote up
def _select_classifier_from_sk_search(estimator, X, A):
    """Return best model from a scikit-learn Search-estimator model.

    Args:
        estimator (GridSearchCV | RandomizedSearchCV): An initialized sklearn SearchCV classifier.
        X (np.ndarray): Covariate matrix size (num_samples, num_features)
        A (np.ndarray): binary labels indicating the source and target populations (num_samples,)

    Returns:
        classifier: model.best_estimator_ - best-performing classifier.
                    See scikit-learn's GridSearchCV and RandomizedSearchCV documentation for details on their return
                    values.
    """
    estimator.fit(X, A)
    best_estimator = clone(estimator.best_estimator_)
    return best_estimator 
Example 12
Project: quincy   Author: tbarabosch   File: QuincyLearn.py    License: GNU General Public License v3.0 6 votes vote down vote up
def learn(self):
        X, y = self.__get_data()
        feature_names =list(X.columns.values)
        if self._undersampling:
            X, y = self.__undersample(feature_names, X, y)

        if self._feature_selection:
            X = self.__select_features(X, y, feature_names)

        if self._scaling:
            logging.info("Scaling...")
            X = preprocessing.scale(X)

        rgs = RandomizedSearchCV(estimator=self._classifier[1], param_distributions=self._classifier[2],
                                 error_score=0, cv=QuincyConfig.CV, n_iter=QuincyConfig.ITERS, refit=True,
                                 n_jobs=-1, scoring=QuincyConfig.METRIC, iid=False)
        rgs.fit(X, y)
        logging.info("Best SCORE: %s" % str(rgs.best_score_))
        logging.info("Best Params: %s" % str(rgs.best_params_))
        self._optimized_model = rgs 
Example 13
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_search.py    License: MIT License 6 votes vote down vote up
def test_search_cv_timing():
    svc = LinearSVC(random_state=0)

    X = [[1, ], [2, ], [3, ], [4, ]]
    y = [0, 1, 1, 0]

    gs = GridSearchCV(svc, {'C': [0, 1]}, cv=2, error_score=0)
    rs = RandomizedSearchCV(svc, {'C': [0, 1]}, cv=2, error_score=0, n_iter=2)

    for search in (gs, rs):
        search.fit(X, y)
        for key in ['mean_fit_time', 'std_fit_time']:
            # NOTE The precision of time.time in windows is not high
            # enough for the fit/score times to be non-zero for trivial X and y
            assert_true(np.all(search.cv_results_[key] >= 0))
            assert_true(np.all(search.cv_results_[key] < 1))

        for key in ['mean_score_time', 'std_score_time']:
            assert_true(search.cv_results_[key][1] >= 0)
            assert_true(search.cv_results_[key][0] == 0.0)
            assert_true(np.all(search.cv_results_[key] < 1)) 
Example 14
Project: healthcareai-py   Author: HealthCatalyst   File: randomized_search.py    License: MIT License 5 votes vote down vote up
def get_algorithm(estimator,
                  scoring_metric,
                  hyperparameter_grid,
                  randomized_search,
                  number_iteration_samples=10,
                  **non_randomized_estimator_kwargs):
    """
    Given an estimator and various params, initialize an algorithm with optional randomized search.

    Args:
        estimator (sklearn.base.BaseEstimator): a scikit-learn estimator (for example: KNeighborsClassifier)
        scoring_metric (str): The scoring metric to optimized for if using random search. See
            http://scikit-learn.org/stable/modules/model_evaluation.html
        hyperparameter_grid (dict): An object containing key value pairs of the specific hyperparameter space to search
            through.
        randomized_search (bool): Whether the method should return a randomized search estimator (as opposed to a
            simple algorithm).
        number_iteration_samples (int): If performing randomized search, this is the number of samples that are run in 
            the hyperparameter space. Higher numbers will be slower, but end up with better results, since it is more
            likely that the true optimal hyperparameter is found.
        **non_randomized_estimator_kwargs: Keyword arguments that you can pass directly to the algorithm. Only used when
            radomized_search is False

    Returns:
        sklearn.base.BaseEstimator: a scikit learn algorithm ready to `.fit()`

    """
    if randomized_search:
        algorithm = RandomizedSearchCV(estimator=estimator(),
                                       scoring=scoring_metric,
                                       param_distributions=hyperparameter_grid,
                                       n_iter=number_iteration_samples,
                                       verbose=0,
                                       n_jobs=1)

    else:
        algorithm = estimator(**non_randomized_estimator_kwargs)

    return algorithm 
Example 15
Project: revrand   Author: NICTA   File: test_models.py    License: Apache License 2.0 5 votes vote down vote up
def test_randomgridsearch_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    slm = StandardLinearModel(LinearBasis(onescol=True))

    param_dict = {
        'var': [Parameter(1.0 / v, Positive()) for v in range(1, 6)]
    }
    estimator = RandomizedSearchCV(slm, param_dict, n_jobs=-1, n_iter=2)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs 
Example 16
Project: revrand   Author: NICTA   File: test_models.py    License: Apache License 2.0 5 votes vote down vote up
def test_randomgridsearch_glm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    glm = GeneralizedLinearModel(Gaussian(), LinearBasis(onescol=True),
                                 random_state=1, maxiter=100)

    param_dict = {'batch_size': range(1, 11)}
    estimator = RandomizedSearchCV(glm, param_dict, verbose=1, n_jobs=-1,
                                   n_iter=2)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs 
Example 17
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_search.py    License: MIT License 5 votes vote down vote up
def test_random_search_with_fit_params():
    check_hyperparameter_searcher_with_fit_params(RandomizedSearchCV, n_iter=1,
                                                  error_score='raise') 
Example 18
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_search.py    License: MIT License 5 votes vote down vote up
def test_trivial_cv_results_attr():
    # Test search over a "grid" with only one point.
    clf = MockClassifier()
    grid_search = GridSearchCV(clf, {'foo_param': [1]})
    grid_search.fit(X, y)
    assert hasattr(grid_search, "cv_results_")

    random_search = RandomizedSearchCV(clf, {'foo_param': [0]}, n_iter=1)
    random_search.fit(X, y)
    assert hasattr(grid_search, "cv_results_") 
Example 19
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_search.py    License: MIT License 5 votes vote down vote up
def test_random_search_cv_results():
    X, y = make_classification(n_samples=50, n_features=4, random_state=42)

    n_splits = 3
    n_search_iter = 30

    params = dict(C=expon(scale=10), gamma=expon(scale=0.1))
    param_keys = ('param_C', 'param_gamma')
    score_keys = ('mean_test_score', 'mean_train_score',
                  'rank_test_score',
                  'split0_test_score', 'split1_test_score',
                  'split2_test_score',
                  'split0_train_score', 'split1_train_score',
                  'split2_train_score',
                  'std_test_score', 'std_train_score',
                  'mean_fit_time', 'std_fit_time',
                  'mean_score_time', 'std_score_time')
    n_cand = n_search_iter

    for iid in (False, True):
        search = RandomizedSearchCV(SVC(gamma='scale'), n_iter=n_search_iter,
                                    cv=n_splits, iid=iid,
                                    param_distributions=params,
                                    return_train_score=True)
        search.fit(X, y)
        assert_equal(iid, search.iid)
        cv_results = search.cv_results_
        # Check results structure
        check_cv_results_array_types(search, param_keys, score_keys)
        check_cv_results_keys(cv_results, param_keys, score_keys, n_cand)
        # For random_search, all the param array vals should be unmasked
        assert not(any(np.ma.getmaskarray(cv_results['param_C'])) or
                   any(np.ma.getmaskarray(cv_results['param_gamma']))) 
Example 20
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_search.py    License: MIT License 5 votes vote down vote up
def test_random_search_cv_results_multimetric():
    X, y = make_classification(n_samples=50, n_features=4, random_state=42)

    n_splits = 3
    n_search_iter = 30
    scoring = ('accuracy', 'recall')

    # Scipy 0.12's stats dists do not accept seed, hence we use param grid
    params = dict(C=np.logspace(-10, 1), gamma=np.logspace(-5, 0, base=0.1))
    for iid in (True, False):
        for refit in (True, False):
            random_searches = []
            for scoring in (('accuracy', 'recall'), 'accuracy', 'recall'):
                # If True, for multi-metric pass refit='accuracy'
                if refit:
                    refit = 'accuracy' if isinstance(scoring, tuple) else refit
                clf = SVC(probability=True, random_state=42)
                random_search = RandomizedSearchCV(clf, n_iter=n_search_iter,
                                                   cv=n_splits, iid=iid,
                                                   param_distributions=params,
                                                   scoring=scoring,
                                                   refit=refit, random_state=0)
                random_search.fit(X, y)
                random_searches.append(random_search)

            compare_cv_results_multimetric_with_single(*random_searches,
                                                       iid=iid)
            if refit:
                compare_refit_methods_when_refit_with_acc(
                    random_searches[0], random_searches[1], refit) 
Example 21
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_search.py    License: MIT License 5 votes vote down vote up
def test_search_cv_results_rank_tie_breaking():
    X, y = make_blobs(n_samples=50, random_state=42)

    # The two C values are close enough to give similar models
    # which would result in a tie of their mean cv-scores
    param_grid = {'C': [1, 1.001, 0.001]}

    grid_search = GridSearchCV(SVC(gamma="scale"), param_grid=param_grid,
                               return_train_score=True)
    random_search = RandomizedSearchCV(SVC(gamma="scale"), n_iter=3,
                                       param_distributions=param_grid,
                                       return_train_score=True)

    for search in (grid_search, random_search):
        search.fit(X, y)
        cv_results = search.cv_results_
        # Check tie breaking strategy -
        # Check that there is a tie in the mean scores between
        # candidates 1 and 2 alone
        assert_almost_equal(cv_results['mean_test_score'][0],
                            cv_results['mean_test_score'][1])
        assert_almost_equal(cv_results['mean_train_score'][0],
                            cv_results['mean_train_score'][1])
        assert not np.allclose(cv_results['mean_test_score'][1],
                               cv_results['mean_test_score'][2])
        assert not np.allclose(cv_results['mean_train_score'][1],
                               cv_results['mean_train_score'][2])
        # 'min' rank should be assigned to the tied candidates
        assert_almost_equal(search.cv_results_['rank_test_score'], [1, 1, 3]) 
Example 22
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_search.py    License: MIT License 5 votes vote down vote up
def test_pickle():
    # Test that a fit search can be pickled
    clf = MockClassifier()
    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True)
    grid_search.fit(X, y)
    grid_search_pickled = pickle.loads(pickle.dumps(grid_search))
    assert_array_almost_equal(grid_search.predict(X),
                              grid_search_pickled.predict(X))

    random_search = RandomizedSearchCV(clf, {'foo_param': [1, 2, 3]},
                                       refit=True, n_iter=3)
    random_search.fit(X, y)
    random_search_pickled = pickle.loads(pickle.dumps(random_search))
    assert_array_almost_equal(random_search.predict(X),
                              random_search_pickled.predict(X)) 
Example 23
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_sgd.py    License: MIT License 5 votes vote down vote up
def test_multi_core_gridsearch_and_early_stopping():
    # This is a non-regression test for a bad interaction between
    # early stopping internal attribute and process-based multi-core
    # parallelism.
    param_grid = {
        'alpha': np.logspace(-4, 4, 9),
        'n_iter_no_change': [5, 10, 50],
    }
    clf = SGDClassifier(tol=1e-3, max_iter=1000, early_stopping=True,
                        random_state=0)
    search = RandomizedSearchCV(clf, param_grid, n_iter=10, cv=5, n_jobs=2,
                                random_state=0)
    search.fit(iris.data, iris.target)
    assert search.best_score_ > 0.8 
Example 24
Project: cherry   Author: Windsooon   File: searcher.py    License: MIT License 5 votes vote down vote up
def __init__(self, model, parameters, language=None, preprocessing=None, categories=None, encoding=None, method=None,
            vectorizer=None, cv=None, n_jobs=None, vectorizer_method=None, clf=None, clf_method=None,
            x_data=None, y_data=None):
        '''
        1. Build pipeline
        2. Run RandomizedSearchCV or GridSearchCV
        3. Display the best score
        '''
        x_data, y_data, vectorizer, clf = load_all(
            model, language=language, preprocessing=preprocessing,
            categories=categories, encoding=encoding, vectorizer=vectorizer,
            vectorizer_method=vectorizer_method, clf=clf,
            clf_method=clf_method, x_data=x_data, y_data=y_data)
        self._search(vectorizer, clf, method, parameters, x_data, y_data, cv, n_jobs) 
Example 25
Project: cherry   Author: Windsooon   File: searcher.py    License: MIT License 5 votes vote down vote up
def _search(self, vectorizer, clf, method, parameters, x_data, y_data, cv, n_jobs):
        text_clf = Pipeline([
            ('vectorizer', vectorizer),
            ('clf', clf)])
        if method == 'RandomizedSearchCV':
            search_clf = RandomizedSearchCV(text_clf, parameters, cv=cv, n_jobs=n_jobs)
        elif method == 'GridSearchCV':
            search_clf = GridSearchCV(text_clf, parameters, cv=cv, n_jobs=n_jobs)
        else:
            error = 'We didn\'t support {0} method yet'.format(method)
            raise MethodNotFoundError(error)
        self.best_score(search_clf, parameters, x_data, y_data) 
Example 26
def hyper_optimize(self, X, y, cv = KFold(n_splits = 5), n_iter_search = 20, report = True):
        '''
        Optimize hyperparameters through cross-validation 
        
        Args:
            X(:obj:'numpy array or pandas dataframe): The inputs or features
            Y(:obj:'numpy array or pandas series): The target or predictand
            cv(:obj:'sklearn.model_selection._split'): The train/test splitting method
            n_iter_search(:obj:'int'): The number of random hyperparmeter samples to use
            report(:obj:'boolean'): Indicator on whether to output a summary report 
                                    on optimization results
        
        Returns:
            (none)
        '''
        from sklearn.model_selection import RandomizedSearchCV
        
        # Setup randomized cross-validated grid search
        self.random_search = RandomizedSearchCV(self.algorithm,
                                                cv = cv, 
                                                param_distributions = self.hyper_range, 
                                                n_iter=n_iter_search,
                                                scoring = self.my_scorer,
                                                verbose = 1,
                                                return_train_score = True)
        # Fit the model to each combination of hyperparmeters
        self.random_search.fit(X, y)        
        
        # Assign optimal parameters to object
        self.opt_hyp = self.random_search.best_params_        
        
        # Output results to terminal
        if report:
            self.hyper_report(self.random_search.cv_results_, n_iter_search) 
Example 27
Project: lightfm   Author: lyst   File: test_movielens.py    License: Apache License 2.0 5 votes vote down vote up
def test_sklearn_cv():

    model = LightFM(loss="warp", random_state=42)

    # Set distributions for hyperparameters
    randint = stats.randint(low=1, high=65)
    randint.random_state = 42
    gamma = stats.gamma(a=1.2, loc=0, scale=0.13)
    gamma.random_state = 42
    distr = {"no_components": randint, "learning_rate": gamma}

    # Custom score function
    def scorer(est, x, y=None):
        return precision_at_k(est, x).mean()

    # Dummy custom CV to ensure shape preservation.
    class CV(KFold):
        def split(self, X, y=None, groups=None):
            idx = np.arange(X.shape[0])
            for _ in range(self.n_splits):
                yield idx, idx

    cv = CV(n_splits=3, random_state=42)
    search = RandomizedSearchCV(
        estimator=model,
        param_distributions=distr,
        n_iter=2,
        scoring=scorer,
        random_state=42,
        cv=cv,
    )
    search.fit(train)
    assert search.best_params_["no_components"] == 58 
Example 28
Project: sktime-dl   Author: sktime   File: _tuned_cnn.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y, **kwargs):
        if self.search_method is 'grid':
            self.grid = GridSearchCV(estimator=self.base_model,
                                     param_grid=self.param_grid,
                                     cv=self.cv_folds,
                                     n_jobs=self.n_jobs)
        elif self.search_method is 'random':
            self.grid = RandomizedSearchCV(estimator=self.base_model,
                                           param_grid=self.param_grid,
                                           cv=self.cv_folds,
                                           n_jobs=self.n_jobs)
        else:
            # todo expand, give options etc
            raise Exception('Unrecognised search method provided: {}'.format(self.search_method))

        self.grid_history = self.grid.fit(X, y, refit=True)
        self.model = self.grid.best_estimator_.model
        self.tuned_params = self.grid.best_params_

        # copying data-wrangling info up
        self.label_encoder = self.grid.best_estimator_.label_encoder  #
        self.classes_ = self.grid.best_estimator_.classes_
        self.nb_classes = self.grid.best_estimator_.nb_classes

        if self.verbose:
            self.print_search_summary()

        return self 
Example 29
Project: finance_ml   Author: jjakimoto   File: hyper.py    License: MIT License 5 votes vote down vote up
def clf_hyper_fit(feat, label, t1, pipe_clf, search_params, scoring=None,
                  n_splits=3, bagging=[0, None, 1.],
                  rnd_search_iter=0, n_jobs=-1, pct_embargo=0., **fit_params):
    # Set default value for scoring
    if scoring is None:
        if set(label.values) == {0, 1}:
            scoring = 'f1'
        else:
            scoring = 'neg_log_loss'
    # HP search on training data
    inner_cv = PurgedKFold(n_splits=n_splits, t1=t1, pct_embargo=pct_embargo)
    if rnd_search_iter == 0:
        search = GridSearchCV(estimator=pipe_clf, param_grid=search_params,
                              scoring=scoring, cv=inner_cv, n_jobs=n_jobs, iid=False)
    else:
        search = RandomizedSearchCV(estimator=pipe_clf, param_distributions=search_params,
                                    scoring=scoring, cv=inner_cv, n_jobs=n_jobs, iid=False)
    best_pipe = search.fit(feat, label, **fit_params).best_estimator_
    # Fit validated model on the entirely of data
    if bagging[0] > 0:
        bag_est = BaggingClassifier(base_estimator=Pipeline(best_pipe.steps),
                                    n_estimators=int(bagging[0]), max_samples=float(bagging[1]),
                                    max_features=float(bagging[2]), n_jobs=n_jobs)
        bag_est = best_pipe.fit(feat, label,
                                sample_weight=fit_params[bag_est.base_estimator.steps[-1][0] + '__sample_weight'])
        best_pipe = Pipeline([('bag', bag_est)])
    return best_pipe 
Example 30
Project: ml-on-gcp   Author: GoogleCloudPlatform   File: gke_parallel.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, search, project_id, zone, cluster_id, bucket_name, image_name, task_name=None):
        """Wraps around a SearchCV object and handles deploying `fit`
        jobs to a GKE cluster.
        """
        if type(search) not in self.SUPPORTED_SEARCH:
            raise TypeError('Search type {} not supported.  Only supporting {}.'.format(type(search), [s.__name__ for s in self.SUPPORTED_SEARCH]))

        self.search = search
        self.project_id = project_id
        self.cluster_id = cluster_id
        self.bucket_name = bucket_name
        self.image_name = image_name
        self.task_name = task_name
        self.gcs_uri = None

        self.cluster = get_cluster(project_id, zone, cluster_id)
        self.n_nodes = self.cluster['currentNodeCount']

        self.task_name = None

        # For GridSearchCV
        self.param_grids = {}
        # For RandomizedSearchCV
        self.param_distributions = None
        self.n_iter = None
        # For BayesSearchCV
        self.search_spaces = {}

        self.job_names = {}
        self.output_uris = {}
        self.output_without_estimator_uris = {}
        self.dones = {}
        self.results = {}

        self.best_estimator_ = None
        self.best_params_ = None
        self.best_score_ = None
        self.best_search_ = None

        self._cancelled = False
        self._done = False