Python sklearn.grid_search.RandomizedSearchCV() Examples

The following are 12 code examples of sklearn.grid_search.RandomizedSearchCV(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.grid_search , or try the search function .
Example #1
Source File: Model_Parameters_CV.py    From ProFET with GNU General Public License v3.0 6 votes vote down vote up
def GridParamSearch(param_dist, clf, X, y, n_iter_search=15) :
    '''
    Searches using rand.search for best model paramters
    diff paramters searched by model type..
    http://nbviewer.ipython.org/github/treycausey/thespread/blob/master/notebooks/basic_random_forest_wp_model.ipynb?create=1
    @param clf: estimator/predictor used.
    @param param_dist: Grid of Parameter ranges to tune for the predictor,
    using randomized CV search.
    '''
    print("Starting grid parameter search")
    random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                       n_iter=n_iter_search,n_jobs=-1)
    start = time()
    # random_search.fit(features, target)
    random_search.fit(X, y)
    print("RandomizedSearchCV took %.2f seconds for %d candidates"
          " parameter settings." % ((time() - start), n_iter_search))
    report(random_search.grid_scores_) 
Example #2
Source File: test_search_2.py    From spark-sklearn with Apache License 2.0 6 votes vote down vote up
def test_example_randomized_search(self):
        # The classic example from the sklearn documentation
        iris = datasets.load_iris()
        parameters = {'kernel': ('linear', 'rbf'), 'C': range(1, 10)}
        svr = svm.SVC()
        clf = grid_search.RandomizedSearchCV(svr, parameters, random_state=4)
        clf.fit(iris.data, iris.target)

        clf2 = RandomizedSearchCV(self.sc, svr, parameters, random_state=4)
        clf2.fit(iris.data, iris.target)

        b1 = clf.estimator
        b2 = clf2.estimator
        self.assertEqual(b1.get_params(), b2.get_params()) 
Example #3
Source File: winfault.py    From wt-fdd with GNU General Public License v3.0 5 votes vote down vote up
def svm_class_and_score(
    X_train, y_train, X_test, y_test, labels, search_type=RandomizedSearchCV,
    parameter_space={
        'kernel': ['linear', 'rbf', 'poly'], 'gamma': ['auto', 1e-3, 1e-4],
        'C': [0.01, .1, 1, 10, 100, 1000],
        'class_weight': [
            {0: 0.01}, {1: 1}, {1: 2}, {1: 10}, {1: 50}, 'balanced']},
        score='recall_weighted', iid=True, bagged=False, svm_results=True):
    """Build an SVM and return its scoring metrics
    """
    print("# Tuning hyper-parameters for %s" % score)
    print()

    # Find the Hyperparameters
    clf = search_type(SVC(C=1), parameter_space, cv=10,
                      scoring=score, iid=iid)

    # Build the SVM
    clf.fit(X_train, y_train)
    print("Hyperparameters found:")
    print(clf.best_params_)

    # Make the predictions
    y_pred = clf.predict(X_test)
    print()
    print()
    print("Results for basic SVM")
    clf_scoring(y_test, y_pred, labels)

    if bagged is True:
        bgg = BaggingClassifier(base_estimator=clf)
        bgg.fit(X_train, y_train)
        y_pred = bgg.predict(X_test)
        print()
        print()
        print("Results for bagging:")
        clf_scoring(y_test, y_pred, labels)
        return clf, bgg
    else:
        return clf 
Example #4
Source File: tunemodels.py    From Supply-demand-forecasting with MIT License 5 votes vote down vote up
def runGridSearch(self, model):
        logging.debug("run grid search on model: {}".format(model.__class__.__name__))
        logging.debug("cross validation strategy: {}".format(model.holdout_split))
        logging.debug("used features: {}".format(model.usedFeatures))
        logging.debug("tuned parameters: {}".format(model.getTunedParamterOptions()))
        
        features,labels,cv = model.getFeaturesLabel()
        # do grid search
        if self.do_random_gridsearch:
            estimator = RandomizedSearchCV(model.clf, model.getTunedParamterOptions(), cv=cv, n_jobs=self.n_jobs,
                       scoring=mean_absolute_percentage_error_scoring, verbose = 500, n_iter=self.n_iter_randomsearch)
        else:
            estimator = GridSearchCV(model.clf, model.getTunedParamterOptions(), cv=cv,n_jobs=-self.n_jobs, 
                                     fit_params=model.get_fit_params(),
                       scoring=mean_absolute_percentage_error_scoring, verbose = 500)
        estimator.fit(features, labels)
        model.clf = estimator.best_estimator_
        model.save_final_model = True
        model.save_model()
        
#         model.dispFeatureImportance()
        logging.debug('estimaator parameters: {}'.format(estimator.get_params))
        logging.debug('Best parameters: {}'.format(estimator.best_params_))
        logging.debug('Best Scores: {}'.format(-estimator.best_score_))
        logging.debug('Score grid: {}'.format(estimator.grid_scores_ ))
        for i in estimator.grid_scores_ :
            logging.debug('parameters: {}'.format(i.parameters ))
            logging.debug('mean_validation_score: {}'.format(np.absolute(i.mean_validation_score)))
            logging.debug('cv_validation_scores: {}'.format(np.absolute(i.cv_validation_scores) ))

        
        
        return 
Example #5
Source File: classification.py    From pyImSegm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_classif_search(name_clf, clf_pipeline, nb_labels,
                          search_type='random', cross_val=10,
                          eval_metric='f1', nb_iter=250, nb_workers=5):
    """ create sklearn search depending on spec. random or grid

    :param int nb_labels: number of labels
    :param str search_type: hyper-params search type
    :param str eval_metric: evaluation metric
    :param int nb_iter: for random number of tries
    :param str name_clf: name of classif.
    :param obj clf_pipeline: object
    :param obj cross_val: obj specific CV for fix train-test
    :param int nb_workers: number jobs running in parallel
    :return:
    """
    score_weight = 'weighted' if nb_labels > 2 else 'binary'
    scoring = metrics.make_scorer(DICT_SCORING[eval_metric.lower()],
                                  average=score_weight)
    if search_type == 'grid':
        clf_parameters = create_clf_param_search_grid(name_clf)
        logging.info('init Grid search...')
        clf_search = GridSearchCV(
            clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
            n_jobs=nb_workers, verbose=1, refit=True)
    else:
        clf_parameters = create_clf_param_search_distrib(name_clf)
        nb_iter = search_params_cut_down_max_nb_iter(clf_parameters, nb_iter)
        logging.info('init Randomized search...')
        clf_search = RandomizedSearchCV(
            clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
            n_jobs=nb_workers, n_iter=nb_iter, verbose=1, refit=True)
    return clf_search 
Example #6
Source File: test_sklearn.py    From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_RandomGlobalParams(self):
        clf = RandomizedSearchCV(
                    self.__estimator__(layers=[L("Sigmoid")], n_iter=1),
                    param_distributions={'learning_rate': uniform(0.001, 0.01)},
                    n_iter=2)
        clf.fit(self.a_in, self.a_out) 
Example #7
Source File: test_sklearn.py    From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_RandomLayerParams(self):
        clf = RandomizedSearchCV(
                    self.__estimator__(layers=[L("Rectifier", units=12), L(self.__output__)], n_iter=1),
                    param_distributions={'hidden0__units': randint(4, 12)},
                    n_iter=2)
        clf.fit(self.a_in, self.a_out) 
Example #8
Source File: test_sklearn.py    From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_RandomMultipleJobs(self):
        clf = RandomizedSearchCV(
                    self.__estimator__(layers=[L("Sigmoid", units=12), L(self.__output__)], n_iter=1),
                    param_distributions={'hidden0__units': randint(4, 12)},
                    n_iter=4, n_jobs=4)
        clf.fit(self.a_in, self.a_out) 
Example #9
Source File: test_search_2.py    From spark-sklearn with Apache License 2.0 5 votes vote down vote up
def test_cv_linreg(self):
        pipeline = SKL_Pipeline([
            ('lasso', SKL_Lasso(max_iter=1))
        ])
        parameters = {
            'lasso__alpha': np.linspace(0.001, 0.01, 1000)
        }
        n_iter = 10
        grid_search = RandomizedSearchCV(self.sc, pipeline, parameters, n_iter=n_iter)
        X = scipy.sparse.vstack(map(lambda x: self.list2csr([x, x+1.0]), range(0, 100)))
        y = np.array(list(range(0, 100))).reshape((100, 1))
        skl_gs = grid_search.fit(X, y)
        assert len(skl_gs.cv_results_['params']) == n_iter 
Example #10
Source File: ml.py    From EDeN with MIT License 4 votes vote down vote up
def fit_estimator(estimator,
                  positive_data_matrix=None,
                  negative_data_matrix=None,
                  target=None,
                  cv=10,
                  n_jobs=-1,
                  n_iter_search=40,
                  random_state=1):
    """fit_estimator."""
    # hyperparameter optimization
    param_dist = {"n_iter": randint(5, 100),
                  "power_t": uniform(0.1),
                  "alpha": uniform(1e-08, 1e-03),
                  "eta0": uniform(1e-03, 1),
                  "penalty": ["l1", "l2", "elasticnet"],
                  "learning_rate": ["invscaling", "constant", "optimal"]}
    scoring = 'roc_auc'
    n_iter_search = n_iter_search
    random_search = RandomizedSearchCV(estimator,
                                       param_distributions=param_dist,
                                       n_iter=n_iter_search,
                                       cv=cv,
                                       scoring=scoring,
                                       n_jobs=n_jobs,
                                       random_state=random_state,
                                       refit=True)
    X, y = make_data_matrix(positive_data_matrix=positive_data_matrix,
                            negative_data_matrix=negative_data_matrix,
                            target=target)
    random_search.fit(X, y)

    logger.debug('\nClassifier:')
    logger.debug('%s' % random_search.best_estimator_)
    logger.debug('\nPredictive performance:')
    # assess the generalization capacity of the model via a 10-fold cross
    # validation
    scoring_strings = ['accuracy', 'precision', 'recall', 'f1',
                       'average_precision', 'roc_auc']
    for scoring in scoring_strings:
        scores = cross_validation.cross_val_score(
            random_search.best_estimator_,
            X,
            y,
            cv=cv,
            scoring=scoring,
            n_jobs=n_jobs)
        logger.debug('%20s: %.3f +- %.3f' %
                     (scoring, np.mean(scores), np.std(scores)))

    return random_search.best_estimator_ 
Example #11
Source File: test_big.py    From skutil with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_large_grid():
        """In this test, we purposely overfit a RandomForest to completely random data
        in order to assert that the test error will far supercede the train error.
        """

        if not SK18:
            custom_cv = KFold(n=y_train.shape[0], n_folds=3, shuffle=True, random_state=42)
        else:
            custom_cv = KFold(n_splits=3, shuffle=True, random_state=42)

        # define the pipe
        pipe = Pipeline([
            ('scaler', SelectiveScaler()),
            ('pca', SelectivePCA(weight=True)),
            ('rf', RandomForestClassifier(random_state=42))
        ])

        # define hyper parameters
        hp = {
            'scaler__scaler': [StandardScaler(), RobustScaler(), MinMaxScaler()],
            'pca__whiten': [True, False],
            'pca__weight': [True, False],
            'pca__n_components': uniform(0.75, 0.15),
            'rf__n_estimators': randint(5, 10),
            'rf__max_depth': randint(5, 15)
        }

        # define the grid
        grid = RandomizedSearchCV(pipe, hp, n_iter=2, scoring='accuracy', n_jobs=1, cv=custom_cv, random_state=42)

        # this will fail because we haven't fit yet
        assert_fails(grid.score, (ValueError, AttributeError), X_train, y_train)

        # fit the grid
        grid.fit(X_train, y_train)

        # score for coverage -- this might warn...
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            grid.score(X_train, y_train)

        # coverage:
        assert grid._estimator_type == 'classifier'

        # get predictions
        tr_pred, te_pred = grid.predict(X_train), grid.predict(X_test)

        # evaluate score (SHOULD be better than random...)
        accuracy_score(y_train, tr_pred), accuracy_score(y_test, te_pred)

        # grid score reports:
        # assert fails for bad percentile
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 0.0})
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 1.0})

        # assert fails for bad y_axis
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'y_axis': 'bad_axis'})

        # assert passes otherwise
        report_grid_score_detail(grid, charts=True, percentile=0.95)  # just ensure percentile works 
Example #12
Source File: mnist_parallel.py    From mHTM with MIT License 4 votes vote down vote up
def main_local(log_dir, ntrain=800, ntest=200, niter=5, nsplits=3,
	global_inhibition=True, ncores=4, seed=None):
	"""
	Perform CV on a subset of the MNIST dataset. Performs parallelizations on
	a local machine.
	
	@param log_dir: The directory to store the results in.
	
	@param ntrain: The number of training samples to use.
	
	@param ntest: The number of testing samples to use.
	
	@param niter: The number of parameter iterations to use.
	
	@param nsplits: The number of splits of the data to use.
	
	@param global_inhibition: If True use global inhibition; otherwise, use
	local inhibition.
	
	@param ncores: The number of cores to use.
	
	@param seed: The seed for the random number generators.
	"""
	
	# Run the initialization
	x, y, kargs, params, cv = main(log_dir, ntrain, ntest, niter, nsplits,
		seed)
	
	# Build the classifier for doing CV
	clf = RandomizedSearchCV(
		estimator=SPRegion(**kargs),
		param_distributions=params,
		n_iter=niter, # Total runs
		n_jobs=ncores, # Use this many number of cores
		pre_dispatch=1 * ncores, # Give each core two jobs at a time
		iid=True, # Data is iid across folds
		cv=cv, # The CV split for the data
		refit=False, # Disable fitting best estimator on full dataset
		random_state=seed # Force same SP across runs
	)
	
	# Fit the models
	clf.fit(x, y)
	
	# Extract the CV results
	parameter_names = sorted(clf.grid_scores_[0].parameters.keys())
	parameter_names.pop(parameter_names.index('log_dir'))
	parameter_values = np.zeros((niter, len(parameter_names)))
	results = np.zeros((niter, nsplits))
	for i, score in enumerate(clf.grid_scores_):
		parameter_values[i] = np.array([score.parameters[k] for k in
			parameter_names])
		results[i] = score.cv_validation_scores
	
	# Save the CV results
	with open(os.path.join(log_dir, 'cv_results.pkl'), 'wb') as f:
		cPickle.dump((parameter_names, parameter_values, results), f,
			cPickle.HIGHEST_PROTOCOL)
	with open(os.path.join(log_dir, 'cv_clf.pkl'), 'wb') as f:
		cPickle.dump((clf.grid_scores_, clf.best_score_, clf.best_params_), f,
			cPickle.HIGHEST_PROTOCOL)