Python sklearn.model_selection.RandomizedSearchCV() Examples

The following are 30 code examples of sklearn.model_selection.RandomizedSearchCV(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.model_selection , or try the search function .
Example #1
Source File: test_search.py    From Mastering-Elasticsearch-7.0 with MIT License 7 votes vote down vote up
def test_empty_cv_iterator_error():
    # Use global X, y

    # create cv
    cv = KFold(n_splits=3).split(X)

    # pop all of it, this should cause the expected ValueError
    [u for u in cv]
    # cv is empty now

    train_size = 100
    ridge = RandomizedSearchCV(Ridge(), {'alpha': [1e-3, 1e-2, 1e-1]},
                               cv=cv, n_jobs=-1)

    # assert that this raises an error
    with pytest.raises(ValueError,
                       match='No fits were performed. '
                             'Was the CV iterator empty\\? '
                             'Were there no candidates\\?'):
        ridge.fit(X[:train_size], y[:train_size]) 
Example #2
Source File: transformers.py    From healthcareai-py with MIT License 6 votes vote down vote up
def getTunedModel( self, baseModel ):
        n_estimators = [100, 200, 300, 400, 500]
        max_features = ['auto', 'sqrt']
        max_depth = [5, 10, 20, 30, 40, 50]
        min_samples_split = [2, 5, 10]
        min_samples_leaf = [1, 2, 4]
        bootstrap = [True, False]
        
        random_grid = {'n_estimators': n_estimators,
                       'max_features': max_features,
                       'max_depth': max_depth,
                       'min_samples_split': min_samples_split,
                       'min_samples_leaf': min_samples_leaf,
                       'bootstrap': bootstrap}
        #print(random_grid)
        
        model_tuned = RandomizedSearchCV(estimator = baseModel, param_distributions = random_grid, n_iter = 2, cv = 2, verbose=0, random_state=100 , n_jobs = -1)
        return model_tuned
        
  

        
        

###################################################################################################################################### 
Example #3
Source File: trained_attack_models.py    From privacy with Apache License 2.0 6 votes vote down vote up
def random_forest(verbose: int = 0, n_jobs: int = 1):
  """Setup a random forest pipeline with cross-validation."""
  rf = ensemble.RandomForestClassifier()

  n_estimators = [100]
  max_features = ['auto', 'sqrt']
  max_depth = [5, 10, 20]
  max_depth.append(None)
  min_samples_split = [2, 5, 10]
  min_samples_leaf = [1, 2, 4]
  random_grid = {'n_estimators': n_estimators,
                 'max_features': max_features,
                 'max_depth': max_depth,
                 'min_samples_split': min_samples_split,
                 'min_samples_leaf': min_samples_leaf}

  pipe = model_selection.RandomizedSearchCV(
      rf, param_distributions=random_grid, n_iter=7, cv=3, n_jobs=n_jobs,
      iid=False, verbose=verbose)
  return pipe 
Example #4
Source File: test_core_operators.py    From lale with Apache License 2.0 6 votes vote down vote up
def test_with_randomizedsearchcv(self):
        from sklearn.model_selection import RandomizedSearchCV
        from sklearn.datasets import load_iris
        from sklearn.metrics import accuracy_score, make_scorer
        from scipy.stats.distributions import uniform
        import numpy as np
        lr = LogisticRegression()
        parameters = {'solver':('liblinear', 'lbfgs'), 'penalty':['l2']}
        ranges, cat_idx = lr.get_param_ranges()
        min_C, max_C, default_C = ranges['C']
        # specify parameters and distributions to sample from
        #the loguniform distribution needs to be taken care of properly
        param_dist = {"solver": ranges['solver'],
                      "C": uniform(min_C, np.log(max_C))}
        # run randomized search
        n_iter_search = 5
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            random_search = RandomizedSearchCV(
                lr, param_distributions=param_dist, n_iter=n_iter_search, cv=5,
                scoring=make_scorer(accuracy_score))
            iris = load_iris()
            random_search.fit(iris.data, iris.target) 
Example #5
Source File: gke_parallel.py    From ml-on-gcp with Apache License 2.0 6 votes vote down vote up
def fit(self, X, y):
        """Deploys `fit` jobs to each worker in the cluster.
        """
        timestamp = str(int(time.time()))
        self.task_name = self.task_name or '{}.{}.{}'.format(self.cluster_id, self.image_name, timestamp)
        self._done = False
        self._cancelled = False

        X_uri, y_uri, _ = self._upload_data(X, y)

        if type(self.search) == GridSearchCV:
            handler = self._handle_grid_search
        elif type(self.search) == RandomizedSearchCV:
            handler = self._handle_randomized_search
        elif type(self.search) == BayesSearchCV:
            handler = self._handle_bayes_search

        print('Fitting {}'.format(type(self.search)))
        handler(X_uri, y_uri)

        self.persist() 
Example #6
Source File: classifier_selection.py    From causallib with Apache License 2.0 6 votes vote down vote up
def _select_classifier_from_sk_search(estimator, X, A):
    """Return best model from a scikit-learn Search-estimator model.

    Args:
        estimator (GridSearchCV | RandomizedSearchCV): An initialized sklearn SearchCV classifier.
        X (np.ndarray): Covariate matrix size (num_samples, num_features)
        A (np.ndarray): binary labels indicating the source and target populations (num_samples,)

    Returns:
        classifier: model.best_estimator_ - best-performing classifier.
                    See scikit-learn's GridSearchCV and RandomizedSearchCV documentation for details on their return
                    values.
    """
    estimator.fit(X, A)
    best_estimator = clone(estimator.best_estimator_)
    return best_estimator 
Example #7
Source File: base.py    From pylift with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def randomized_search(self, **kwargs):
        """Randomized search using sklearn.model_selection.RandomizedSearchCV.

        Any parameters typically associated with RandomizedSearchCV (see
        sklearn documentation) can be passed as keyword arguments to this
        function.

        The final dictionary used for the randomized search is saved to
        `self.randomized_search_params`. This is updated with any parameters
        that are passed.

        Examples
        --------
        # Passing kwargs.
        self.randomized_search(param_distributions={'max_depth':[2,3,5,10]}, refit=True)

        """
        self.randomized_search_params.update(kwargs)
        self.rand_search_ = RandomizedSearchCV(**self.randomized_search_params)
        self.rand_search_.fit(self.x_train, self.transformed_y_train)
        return self.rand_search_ 
Example #8
Source File: base.py    From pylift with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def randomized_search(self, **kwargs):
        """Randomized search using sklearn.model_selection.RandomizedSearchCV.

        Any parameters typically associated with RandomizedSearchCV (see
        sklearn documentation) can be passed as keyword arguments to this
        function.

        The final dictionary used for the randomized search is saved to
        `self.randomized_search_params`. This is updated with any parameters
        that are passed.

        Examples
        --------
        # Passing kwargs.
        self.randomized_search(param_distributions={'max_depth':[2,3,5,10]}, refit=True)

        """
        self.randomized_search_params.update(kwargs)
        self.rand_search_ = RandomizedSearchCV(**self.randomized_search_params)
        self.rand_search_.fit(self.x_train, self.transformed_y_train)
        return self.rand_search_ 
Example #9
Source File: QuincyLearn.py    From quincy with GNU General Public License v3.0 6 votes vote down vote up
def learn(self):
        X, y = self.__get_data()
        feature_names =list(X.columns.values)
        if self._undersampling:
            X, y = self.__undersample(feature_names, X, y)

        if self._feature_selection:
            X = self.__select_features(X, y, feature_names)

        if self._scaling:
            logging.info("Scaling...")
            X = preprocessing.scale(X)

        rgs = RandomizedSearchCV(estimator=self._classifier[1], param_distributions=self._classifier[2],
                                 error_score=0, cv=QuincyConfig.CV, n_iter=QuincyConfig.ITERS, refit=True,
                                 n_jobs=-1, scoring=QuincyConfig.METRIC, iid=False)
        rgs.fit(X, y)
        logging.info("Best SCORE: %s" % str(rgs.best_score_))
        logging.info("Best Params: %s" % str(rgs.best_params_))
        self._optimized_model = rgs 
Example #10
Source File: grid_search.py    From skutil with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, y=None, groups=None):
            """Run fit on the estimator with randomly drawn parameters.

            Parameters
            ----------

            X : array-like, shape=(n_samples, n_features)
                Training vector, where n_samples is the number of samples and
                n_features is the number of features.

            y : array-like, shape=(n_samples,) or (n_samples, n_output), optional (default=None)
                Target relative to X for classification or regression;
                None for unsupervised learning.

            groups : array-like, shape=(n_samples,), optional (default=None)
                Group labels for the samples used while splitting the dataset into
                train/test set.
            """
            return super(RandomizedSearchCV, self).fit(X, _as_numpy(y), groups) 
Example #11
Source File: test_search.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_search_cv_timing():
    svc = LinearSVC(random_state=0)

    X = [[1, ], [2, ], [3, ], [4, ]]
    y = [0, 1, 1, 0]

    gs = GridSearchCV(svc, {'C': [0, 1]}, cv=2, error_score=0)
    rs = RandomizedSearchCV(svc, {'C': [0, 1]}, cv=2, error_score=0, n_iter=2)

    for search in (gs, rs):
        search.fit(X, y)
        for key in ['mean_fit_time', 'std_fit_time']:
            # NOTE The precision of time.time in windows is not high
            # enough for the fit/score times to be non-zero for trivial X and y
            assert_true(np.all(search.cv_results_[key] >= 0))
            assert_true(np.all(search.cv_results_[key] < 1))

        for key in ['mean_score_time', 'std_score_time']:
            assert_true(search.cv_results_[key][1] >= 0)
            assert_true(search.cv_results_[key][0] == 0.0)
            assert_true(np.all(search.cv_results_[key] < 1)) 
Example #12
Source File: models.py    From aletheia with MIT License 6 votes vote down vote up
def _prepare_classifier(self, params, n_jobs=1):

        X_train, y_train = params

        tuned_parameters = [{
            'kernel': ['rbf'], 
            'gamma': [1e-4,1e-3,1e-2,1e-1,1e+0,1e+1,1e+2,1e+3,1e+4],
            'C': [1e+0,1e+1,1e+2,1e+3,1e+4,1e+5,1e+6,1e+7,1e+8,1e+9]
        }]

        clf=RandomizedSearchCV(svm.SVC(random_state=self.random_state), 
                               tuned_parameters[0], 
                               n_iter=self.n_randomized_search_iter, 
                               n_jobs=n_jobs, random_state=self.random_state)
        clf.fit(X_train, y_train)
              
        params=clf.best_params_
        clf=svm.SVC(kernel=params['kernel'], C=params['C'], 
            gamma=params['gamma'], probability=True, 
            random_state=self.random_state)
        clf.fit(X_train, y_train)

        return clf 
Example #13
Source File: randomized_search.py    From healthcareai-py with MIT License 5 votes vote down vote up
def get_algorithm(estimator,
                  scoring_metric,
                  hyperparameter_grid,
                  randomized_search,
                  number_iteration_samples=10,
                  **non_randomized_estimator_kwargs):
    """
    Given an estimator and various params, initialize an algorithm with optional randomized search.

    Args:
        estimator (sklearn.base.BaseEstimator): a scikit-learn estimator (for example: KNeighborsClassifier)
        scoring_metric (str): The scoring metric to optimized for if using random search. See
            http://scikit-learn.org/stable/modules/model_evaluation.html
        hyperparameter_grid (dict): An object containing key value pairs of the specific hyperparameter space to search
            through.
        randomized_search (bool): Whether the method should return a randomized search estimator (as opposed to a
            simple algorithm).
        number_iteration_samples (int): If performing randomized search, this is the number of samples that are run in 
            the hyperparameter space. Higher numbers will be slower, but end up with better results, since it is more
            likely that the true optimal hyperparameter is found.
        **non_randomized_estimator_kwargs: Keyword arguments that you can pass directly to the algorithm. Only used when
            radomized_search is False

    Returns:
        sklearn.base.BaseEstimator: a scikit learn algorithm ready to `.fit()`

    """
    if randomized_search:
        algorithm = RandomizedSearchCV(estimator=estimator(),
                                       scoring=scoring_metric,
                                       param_distributions=hyperparameter_grid,
                                       n_iter=number_iteration_samples,
                                       verbose=0,
                                       n_jobs=1)

    else:
        algorithm = estimator(**non_randomized_estimator_kwargs)

    return algorithm 
Example #14
Source File: test_search.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_return_train_score_warn():
    # Test that warnings are raised. Will be removed in 0.21

    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)
    grid = {'C': [1, 2]}

    estimators = [GridSearchCV(LinearSVC(random_state=0), grid),
                  RandomizedSearchCV(LinearSVC(random_state=0), grid,
                                     n_iter=2)]

    result = {}
    for estimator in estimators:
        for val in [True, False, 'warn']:
            estimator.set_params(return_train_score=val)
            result[val] = assert_no_warnings(estimator.fit, X, y).cv_results_

    train_keys = ['split0_train_score', 'split1_train_score',
                  'split2_train_score', 'mean_train_score', 'std_train_score']
    for key in train_keys:
        msg = (
            'You are accessing a training score ({!r}), '
            'which will not be available by default '
            'any more in 0.21. If you need training scores, '
            'please set return_train_score=True').format(key)
        train_score = assert_warns_message(FutureWarning, msg,
                                           result['warn'].get, key)
        assert np.allclose(train_score, result[True][key])
        assert key not in result[False]

    for key in result['warn']:
        if key not in train_keys:
            assert_no_warnings(result['warn'].get, key) 
Example #15
Source File: classification.py    From pyImSegm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_classif_search(name_clf, clf_pipeline, nb_labels,
                          search_type='random', cross_val=10,
                          eval_metric='f1', nb_iter=250, nb_workers=5):
    """ create sklearn search depending on spec. random or grid

    :param int nb_labels: number of labels
    :param str search_type: hyper-params search type
    :param str eval_metric: evaluation metric
    :param int nb_iter: for random number of tries
    :param str name_clf: name of classif.
    :param obj clf_pipeline: object
    :param obj cross_val: obj specific CV for fix train-test
    :param int nb_workers: number jobs running in parallel
    :return:
    """
    score_weight = 'weighted' if nb_labels > 2 else 'binary'
    scoring = metrics.make_scorer(DICT_SCORING[eval_metric.lower()],
                                  average=score_weight)
    if search_type == 'grid':
        clf_parameters = create_clf_param_search_grid(name_clf)
        logging.info('init Grid search...')
        clf_search = GridSearchCV(
            clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
            n_jobs=nb_workers, verbose=1, refit=True)
    else:
        clf_parameters = create_clf_param_search_distrib(name_clf)
        nb_iter = search_params_cut_down_max_nb_iter(clf_parameters, nb_iter)
        logging.info('init Randomized search...')
        clf_search = RandomizedSearchCV(
            clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
            n_jobs=nb_workers, n_iter=nb_iter, verbose=1, refit=True)
    return clf_search 
Example #16
Source File: test_search.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_pickle():
    # Test that a fit search can be pickled
    clf = MockClassifier()
    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True)
    grid_search.fit(X, y)
    grid_search_pickled = pickle.loads(pickle.dumps(grid_search))
    assert_array_almost_equal(grid_search.predict(X),
                              grid_search_pickled.predict(X))

    random_search = RandomizedSearchCV(clf, {'foo_param': [1, 2, 3]},
                                       refit=True, n_iter=3)
    random_search.fit(X, y)
    random_search_pickled = pickle.loads(pickle.dumps(random_search))
    assert_array_almost_equal(random_search.predict(X),
                              random_search_pickled.predict(X)) 
Example #17
Source File: train.py    From edge-tpu-servers with MIT License 5 votes vote down vote up
def find_best_xgb_estimator(X, y, cv, param_comb):
    # Random search over specified parameter values for XGBoost.
    # Exhaustive search takes many more cycles w/o much benefit.
    # Returns optimized XGBoost estimator.
    # Ref: https://www.kaggle.com/tilii7/hyperparameter-grid-search-with-xgboost
    print('\n Finding best XGBoost estimator...')
    param_grid = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'max_depth': [3, 4, 5]
        }
    init_est = xgb(learning_rate=0.02, n_estimators=600, objective='multi:softprob',
        verbose=1, nthread=1)
    random_search = RandomizedSearchCV(estimator=init_est, param_distributions=param_grid,
        n_iter=param_comb, n_jobs=4, iid=False, cv=cv,
        verbose=1, random_state=RANDOM_SEED)
    random_search.fit(X, y)
    #print('\n All results:')
    #print(random_search.cv_results_)
    print('\n Best estimator:')
    print(random_search.best_estimator_)
    print('\n Best normalized gini score for %d-fold search with %d parameter combinations:' %
        (FOLDS, PARA_COMB))
    print(random_search.best_score_)
    print('\n Best hyperparameters:')
    print(random_search.best_params_)
    return random_search.best_estimator_

# Split data up into train and test sets. 
Example #18
Source File: train.py    From smart-zoneminder with MIT License 5 votes vote down vote up
def find_best_xgb_estimator(X, y, cv, param_comb, random_seed):
    # Random search over specified parameter values for XGBoost.
    # Exhaustive search takes many more cycles w/o much benefit.
    # Returns optimized XGBoost estimator.
    # Ref: https://www.kaggle.com/tilii7/hyperparameter-grid-search-with-xgboost
    print('\n Finding best XGBoost estimator...')
    param_grid = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'max_depth': [3, 4, 5]
        }
    init_est = xgb(learning_rate=0.02, n_estimators=600, objective='multi:softprob',
        verbose=1, n_jobs=1, random_state=random_seed)
    random_search = RandomizedSearchCV(estimator=init_est,
        param_distributions=param_grid, n_iter=param_comb, n_jobs=4,
        cv=cv, verbose=1, random_state=random_seed)
    random_search.fit(X, y)
    #print('\n All results:')
    #print(random_search.cv_results_)
    print('\n Best estimator:')
    print(random_search.best_estimator_)
    print('\n Best score for {}-fold search with {} parameter combinations:'
        .format(FOLDS, PARA_COMB))
    print(random_search.best_score_)
    print('\n Best hyperparameters:')
    print(random_search.best_params_)
    return random_search.best_estimator_

# Load the known faces and embeddings. 
Example #19
Source File: strategies.py    From sktime with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _check_estimator_compatibility(self, estimator):
        """
        Check compatibility of estimator with strategy
        """

        # Determine required estimator type from strategy case
        # TODO replace with strategy - estimator type registry lookup
        if hasattr(self, '_traits'):
            required = self._traits["required_estimator_type"]
            if any(estimator_type not in ESTIMATOR_TYPES for estimator_type in
                   required):
                raise AttributeError("Required estimator type unknown")
        else:
            raise AttributeError("Required estimator type not found")

        # # Check estimator compatibility with required type
        # If pipeline, check compatibility of final estimator
        if isinstance(estimator, Pipeline):
            final_estimator = estimator.steps[-1][1]
            if not isinstance(final_estimator, required):
                raise ValueError(
                    f"Final estimator of passed pipeline estimator must be "
                    f"of type: {required}, "
                    f"but found: {type(final_estimator)}")

        # If tuning meta-estimator, check compatibility of inner estimator
        elif isinstance(estimator, (GridSearchCV, RandomizedSearchCV)):
            estimator = estimator.estimator
            if not isinstance(estimator, required):
                raise ValueError(
                    f"Inner estimator of passed meta-estimator must be of "
                    f"type: {required}, "
                    f"but found: {type(estimator)}")

        # Otherwise check estimator directly
        else:
            if not isinstance(estimator, required):
                raise ValueError(
                    f"Passed estimator has to be of type: {required}, "
                    f"but found: {type(estimator)}") 
Example #20
Source File: classifier_selection.py    From causallib with Apache License 2.0 5 votes vote down vote up
def select_classifier(model, X, A, n_splits=5, loss_type='01', seed=None):
    """Utility for selecting best classifier using cross-validation.

    Args:
        model: Either one of: scikit-learn classifier, scikit-learn SearchCV model (GridSearchCV, RandomizedSearchCV),
                              list of classifiers.
        X (np.ndarray): Covariate matrix size (num_samples, num_features)
        A (np.ndarray): binary labels indicating the source and target populations (num_samples,)
        n_splits (int): number of splits in cross-validation. relevant only if list of classifiers is passed.
        loss_type (str): name of loss metric to select classifier by. Either '01' for zero-one loss, otherwise
                         cross-entropy is used (and classifiers must implement predict_proba).
                         relevant only if list of classifiers is passed.
        seed (int): random seed for cross-validation split. relevant only if list of classifiers is passed.

    Returns:
        classifier: best performing classifier on validation set.
    """
    if isinstance(model, (GridSearchCV, RandomizedSearchCV)):
        selected_model = _select_classifier_from_sk_search(model, X, A)
    elif isinstance(model, list):
        selected_model = _select_classifier_from_list(candidates=model, X=X, A=A, n_splits=n_splits, seed=seed,
                                                      loss_type=loss_type)
    elif isinstance(model, dict):
        selected_model = _select_classifier_from_grid(X=X, A=A, n_splits=n_splits, seed=seed, **model,
                                                      loss_type=loss_type)
    else:  # A regular classifier was passed
        selected_model = model
    return selected_model 
Example #21
Source File: test_search.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_random_search_cv_results_multimetric():
    X, y = make_classification(n_samples=50, n_features=4, random_state=42)

    n_splits = 3
    n_search_iter = 30
    scoring = ('accuracy', 'recall')

    # Scipy 0.12's stats dists do not accept seed, hence we use param grid
    params = dict(C=np.logspace(-10, 1), gamma=np.logspace(-5, 0, base=0.1))
    for iid in (True, False):
        for refit in (True, False):
            random_searches = []
            for scoring in (('accuracy', 'recall'), 'accuracy', 'recall'):
                # If True, for multi-metric pass refit='accuracy'
                if refit:
                    refit = 'accuracy' if isinstance(scoring, tuple) else refit
                clf = SVC(probability=True, random_state=42)
                random_search = RandomizedSearchCV(clf, n_iter=n_search_iter,
                                                   cv=n_splits, iid=iid,
                                                   param_distributions=params,
                                                   scoring=scoring,
                                                   refit=refit, random_state=0)
                random_search.fit(X, y)
                random_searches.append(random_search)

            compare_cv_results_multimetric_with_single(*random_searches,
                                                       iid=iid)
            if refit:
                compare_refit_methods_when_refit_with_acc(
                    random_searches[0], random_searches[1], refit) 
Example #22
Source File: _model.py    From scitime with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _random_search(self, inputs, outputs, iterations, save_model=False):
        """
        performs a random search on the NN meta algo to find the best params

        :param inputs: pd.DataFrame chosen as input
        :param outputs: pd.DataFrame chosen as output
        :param iterations: Number of parameter settings that are sampled
        :param save_model: boolean set to True if the model needs to be saved
        :return: best meta_algo with parameters
        :rtype: scikit learn RandomizedSearchCV object
        """
        X, y, cols, original_cols = self._transform_data(inputs, outputs)

        if self.meta_algo != 'NN':
            raise KeyError(f'''meta algo {self.meta_algo} not supported for random search''')

        parameter_space = config("random_search_params")
        meta_algo = MLPRegressor(max_iter=200)

        X_train, X_test, y_train, y_test \
            = train_test_split(X, y, test_size=0.20, random_state=42)

        X_train, X_test = self._scale_data(X_train, X_test, save_model)

        meta_algo = RandomizedSearchCV(meta_algo, parameter_space,
                                       n_iter=iterations, n_jobs=2)
        meta_algo.fit(X_train, y_train)

        if self.verbose >= 2:
            self.logger.info(f'''Best parameters found: {meta_algo.best_estimator_}''')

        return meta_algo 
Example #23
Source File: test_sklearn_compatibility.py    From neupy with MIT License 5 votes vote down vote up
def test_grid_search(self):
        def scorer(network, X, y):
            y = asfloat(y)
            result = asfloat(network.predict(X))
            return self.eval(objectives.rmsle(result[:, 0], y))

        dataset = datasets.load_diabetes()
        x_train, x_test, y_train, y_test = train_test_split(
            dataset.data, dataset.target, test_size=0.3
        )

        grnnet = algorithms.GRNN(std=0.5, verbose=False)
        grnnet.train(x_train, y_train)
        error = scorer(grnnet, x_test, y_test)

        self.assertAlmostEqual(0.513, error, places=3)

        random_search = model_selection.RandomizedSearchCV(
            grnnet,
            param_distributions={'std': np.arange(1e-2, 0.1, 1e-4)},
            n_iter=10,
            scoring=scorer,
            random_state=self.random_seed,
            cv=3,
        )
        random_search.fit(dataset.data, dataset.target)
        scores = random_search.cv_results_

        best_score = min(scores['mean_test_score'])
        self.assertAlmostEqual(0.4266, best_score, places=3) 
Example #24
Source File: test_model_selection.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.model_selection.KFold, ms.KFold)
        self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold)
        self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut)

        self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.model_selection.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.model_selection.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler)

        # Model validation 
Example #25
Source File: test_model_selection.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper_abbr(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.ms.KFold, ms.KFold)
        self.assertIs(df.ms.GroupKFold, ms.GroupKFold)
        self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.ms.LeavePOut, ms.LeavePOut)

        self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.ms.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.ms.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler)

        # Model validation 
Example #26
Source File: test_search.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_random_search_cv_results():
    X, y = make_classification(n_samples=50, n_features=4, random_state=42)

    n_splits = 3
    n_search_iter = 30

    params = dict(C=expon(scale=10), gamma=expon(scale=0.1))
    param_keys = ('param_C', 'param_gamma')
    score_keys = ('mean_test_score', 'mean_train_score',
                  'rank_test_score',
                  'split0_test_score', 'split1_test_score',
                  'split2_test_score',
                  'split0_train_score', 'split1_train_score',
                  'split2_train_score',
                  'std_test_score', 'std_train_score',
                  'mean_fit_time', 'std_fit_time',
                  'mean_score_time', 'std_score_time')
    n_cand = n_search_iter

    for iid in (False, True):
        search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_splits,
                                    iid=iid, param_distributions=params)
        search.fit(X, y)
        assert_equal(iid, search.iid)
        cv_results = search.cv_results_
        # Check results structure
        check_cv_results_array_types(search, param_keys, score_keys)
        check_cv_results_keys(cv_results, param_keys, score_keys, n_cand)
        # For random_search, all the param array vals should be unmasked
        assert_false(any(cv_results['param_C'].mask) or
                     any(cv_results['param_gamma'].mask))
        check_cv_results_grid_scores_consistency(search) 
Example #27
Source File: hyper_opt.py    From MAST-ML with MIT License 5 votes vote down vote up
def fit(self, X, y, savepath=None, refit=True):
        rst = dict()
        param_dict = self._get_randomized_param_dict()

        if savepath is None:
            savepath = os.getcwd()

        estimator_name = self._estimator_name

        if self.cv is None:
            self.cv = ms.RepeatedKFold()

        model = RandomizedSearchCV(self.estimator, param_dict, n_iter=self.n_iter, scoring=self.scoring, cv=self.cv,
                                   refit=refit, n_jobs=self.n_jobs, verbose=2)

        try:
            rst[estimator_name] = model.fit(X, y)
        except:
            log.error('Hyperparameter optimization failed, likely due to inappropriate domain of values to optimize'
                               ' one or more parameters over. Please check your input file and the sklearn docs for the mode'
                               ' you are optimizing for the domain of correct values')
            exit()

        best_estimator = rst[estimator_name].best_estimator_

        self._save_output(savepath, rst)
        return best_estimator 
Example #28
Source File: mnist_random_search.py    From Deep-Learning-Quick-Reference with MIT License 5 votes vote down vote up
def main():
    data = load_mnist()
    model = KerasClassifier(build_fn=build_network, verbose=0)
    hyperparameters = create_hyperparameters()
    search = RandomizedSearchCV(estimator=model, param_distributions=hyperparameters, n_iter=10, n_jobs=1, cv=3,
                              verbose=1)
    search.fit(data["train_X"], data["train_y"])

    print(search.best_params_) 
Example #29
Source File: test_search.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_trivial_cv_results_attr():
    # Test search over a "grid" with only one point.
    # Non-regression test: grid_scores_ wouldn't be set by GridSearchCV.
    clf = MockClassifier()
    grid_search = GridSearchCV(clf, {'foo_param': [1]})
    grid_search.fit(X, y)
    assert_true(hasattr(grid_search, "cv_results_"))

    random_search = RandomizedSearchCV(clf, {'foo_param': [0]}, n_iter=1)
    random_search.fit(X, y)
    assert_true(hasattr(grid_search, "cv_results_")) 
Example #30
Source File: test_search.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_random_search_with_fit_params():
    check_hyperparameter_searcher_with_fit_params(RandomizedSearchCV, n_iter=1)