Python sklearn.grid_search.GridSearchCV() Examples

The following are 30 code examples of sklearn.grid_search.GridSearchCV(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.grid_search , or try the search function

Example #1

Source File: test_grid_search.py From sparkit-learn with Apache License 2.0

6 votes

def test_same_result(self):
        X, y, Z = self.make_classification(2, 40000, nonnegative=True)

        parameters = {'alpha': [0.1, 1, 10]}
        fit_params = {'classes': np.unique(y)}

        local_estimator = MultinomialNB()
        local_grid = GridSearchCV(estimator=local_estimator,
                                  param_grid=parameters)

        estimator = SparkMultinomialNB()
        grid = SparkGridSearchCV(estimator=estimator,
                                 param_grid=parameters,
                                 fit_params=fit_params)

        local_grid.fit(X, y)
        grid.fit(Z)

        locscores = [r.mean_validation_score for r in local_grid.grid_scores_]
        scores = [r.mean_validation_score for r in grid.grid_scores_]

        assert_array_almost_equal(locscores, scores, decimal=2)

Example #2

Source File: RegressionDecisionTree.py From AirTicketPredicting with MIT License

6 votes

def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_features': ['sqrt', 'log2', None],
                             'max_depth': range(2,1000),
                             }
                            ]


        reg = GridSearchCV(DecisionTreeRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_true, y_pred)

Example #3

Source File: RegressionRidgeReg.py From AirTicketPredicting with MIT License

6 votes

def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'alpha': np.logspace(-5,5)
                             }
                            ]


        reg = GridSearchCV(linear_model.Ridge(alpha = 0.5), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true)

Example #4

Source File: RegressionKNN.py From AirTicketPredicting with MIT License

6 votes

def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,100)
                             }
                            ]


        reg = GridSearchCV(neighbors.KNeighborsRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true)

Example #5

Source File: RegressionRandomForest.py From AirTicketPredicting with MIT License

6 votes

def parameterChoosing(self):
        #Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(20,60),
                             'n_estimators': range(10,40),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(RandomForestRegressor(n_estimators=30), tuned_parameters, cv=5, scoring='mean_squared_error')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print mean_squared_error(y_true, y_pred)

Example #6

Source File: ClassificationSVM.py From AirTicketPredicting with MIT License

6 votes

def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'kernel': ['rbf'],
                             'gamma': np.logspace(-4, 3, 30),
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]},
                             {'kernel': ['poly'],
                              'degree': [1, 2, 3, 4],
                              'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000],
                              'coef0': np.logspace(-4, 3, 30)},
                            {'kernel': ['linear'],
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}]

        clf = GridSearchCV(svm.SVC(C=1), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)

Example #7

Source File: ClassificationLogReg.py From AirTicketPredicting with MIT License

6 votes

def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'penalty': ['l1'],
                             'C': np.logspace(-5,5)},
                             {'penalty': ['l2'],
                              'C': np.logspace(-5,5)}]

        clf = GridSearchCV(linear_model.LogisticRegression(tol=1e-6), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)

Example #8

Source File: ClassificationKNN.py From AirTicketPredicting with MIT License

6 votes

def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,60)
                             }
                            ]


        clf = GridSearchCV(neighbors.KNeighborsClassifier(), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)

Example #9

Source File: ClassificationDecisionTree.py From AirTicketPredicting with MIT License

6 votes

def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(2,60),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(DecisionTreeClassifier(max_depth=5), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)

Example #10

Source File: GC_script.py From ClimateVegetationDynamics_GrangerCausality with GNU General Public License v3.0

6 votes

def nestedCrossValidation(X, y, cvFolds, estimator):  
    kf = KFold(len(X), n_folds=cvFolds, shuffle=True, random_state = 30)
    cv_j=0
    param_grid = {'alpha': [0.0000001,0.000001,0.00001,0.0001,0.001,0.01,0.1,1,10,100,1000,10000,100000, 1000000, 10000000,1000000000]}
    r2 = np.zeros((cvFolds,1))   
    for train_index, test_index in kf:
        train_X = X[train_index,:]
        test_X = X[test_index,:]
        train_y = y[train_index]
        test_y = y[test_index]
        grid = GridSearchCV(estimator, param_grid=param_grid, verbose=0, cv=cvFolds, scoring='mean_squared_error')
        grid.fit(train_X,train_y)
        y_true, y_pred = test_y,grid.best_estimator_.predict(test_X)
        r2[cv_j] = r2_score(y_true, y_pred) 
        cv_j = cv_j + 1 
    return r2
    
#%% main script

Example #11

Source File: classif_and_ktst.py From jstsp2015 with MIT License

6 votes

def compute_svm_score_nestedCV(K, y, n_folds,
                               scoring=balanced_accuracy_scoring,
                               random_state=None,
                               param_grid=[{'C': np.logspace(-5, 5, 25)}]):
    """Compute cross-validated score of SVM using precomputed kernel.
    """
    cv = StratifiedKFold(y, n_folds=n_folds, shuffle=True,
                         random_state=random_state)
    scores = np.zeros(n_folds)
    for i, (train, test) in enumerate(cv):
        cvclf = SVC(kernel='precomputed')
        y_train = y[train]
        cvcv = StratifiedKFold(y_train, n_folds=n_folds,
                               shuffle=True,
                               random_state=random_state)
        clf = GridSearchCV(cvclf, param_grid=param_grid, scoring=scoring,
                           cv=cvcv, n_jobs=1)
        clf.fit(K[train, :][:, train], y_train)
        # print clf.best_params_
        scores[i] = clf.score(K[test, :][:, train], y[test])

    return scores.mean()

Example #12

Source File: test_search_2.py From spark-sklearn with Apache License 2.0

6 votes

def test_cv_pipeline(self):
        pipeline = SKL_Pipeline([
            ('vect', SKL_HashingVectorizer(n_features=20)),
            ('tfidf', SKL_TfidfTransformer(use_idf=False)),
            ('lasso', SKL_Lasso())
        ])
        parameters = {
            'lasso__alpha': (0.001, 0.005, 0.01)
        }
        grid_search = GridSearchCV(self.sc, pipeline, parameters)
        data = [('hi there', 0.0),
                ('what is up', 1.0),
                ('huh', 1.0),
                ('now is the time', 5.0),
                ('for what', 0.0),
                ('the spark was there', 5.0),
                ('and so', 3.0),
                ('were many socks', 0.0),
                ('really', 1.0),
                ('too cool', 2.0)]
        df = self.sql.createDataFrame(data, ["review", "rating"]).toPandas()
        skl_gs = grid_search.fit(df.review.values, df.rating.values)
        assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha'])

Example #13

Source File: 04_sent.py From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License

6 votes

def __grid_search_model(clf_factory, X, Y):
    cv = ShuffleSplit(
        n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)

    param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)],
                      vect__min_df=[1, 2],
                      vect__smooth_idf=[False, True],
                      vect__use_idf=[False, True],
                      vect__sublinear_tf=[False, True],
                      vect__binary=[False, True],
                      clf__alpha=[0, 0.01, 0.05, 0.1, 0.5, 1],
                      )

    grid_search = GridSearchCV(clf_factory(),
                               param_grid=param_grid,
                               cv=cv,
                               score_func=f1_score,
                               verbose=10)
    grid_search.fit(X, Y)
    clf = grid_search.best_estimator_
    print clf

    return clf

Example #14

Source File: 02_tuning.py From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License

6 votes

def grid_search_model(clf_factory, X, Y):
    cv = ShuffleSplit(
        n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)

    param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)],
                      vect__min_df=[1, 2],
                      vect__stop_words=[None, "english"],
                      vect__smooth_idf=[False, True],
                      vect__use_idf=[False, True],
                      vect__sublinear_tf=[False, True],
                      vect__binary=[False, True],
                      clf__alpha=[0, 0.01, 0.05, 0.1, 0.5, 1],
                      )

    grid_search = GridSearchCV(clf_factory(),
                               param_grid=param_grid,
                               cv=cv,
                               score_func=f1_score,
                               verbose=10)
    grid_search.fit(X, Y)
    clf = grid_search.best_estimator_
    print clf

    return clf

Example #15

Source File: test_sklearn.py From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License

5 votes

def test_GridLayerParams(self):
        clf = GridSearchCV(
                    self.__estimator__(layers=[L("Rectifier", units=12), L(self.__output__)], n_iter=1),
                    param_grid={'hidden0__units': [4, 8, 12]})
        clf.fit(self.a_in, self.a_out)

Example #16

Source File: GetMLPara.py From dr_droid with Apache License 2.0

5 votes

def selection_parameters_for_classfier(X,y):

    from sklearn import grid_search

    #paras={ 'n_neighbors':[1,10], 'weights':['uniform', 'distance'], 'algorithm':['auto', 'ball_tree','kd_tree', 'brute'], 'leaf_size':[20,50]}
    #knn = KNeighborsClassifier()

    #naive_bayes
    #nbg = GaussianNB()
    #nbm = MultinomialNB()
    #nbb = BernoulliNB()

    #decision tree
    #paras={ 'criterion':['gini','entropy'], 'splitter':['random', 'best'], 'max_features':[None, 'auto','sqrt', 'log2'], 'min_samples_split':[1,10]}
    #dtree = DecisionTreeClassifier()

    #random forest
    #rforest = RandomForestClassifier()
    #paras={ 'n_estimators':[2,15], 'criterion':['gini','entropy'], 'max_features': ['auto','sqrt', 'log2'], 'min_samples_split':[1,10]}

    #svm
    svmm = svm.SVC()
    paras={'kernel':['rbf','linear','poly']}


    clt =grid_search.GridSearchCV(svmm, paras, cv=5)
    clt.fit(X,y)
    print (clt)
    #print (clt.get_params())
    print (clt.set_params())
    print (clt.score(X,y))

    #scores = cross_val_score(clt,X,y,cv=10)
    #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))


#this is to get score using cross_validation

Example #17

Source File: model_selection.py From StrepHit with GNU General Public License v3.0

5 votes

def fit(self, training_sets):
        """ Searches for the best estimator and its arguments as well as the best
            training set amongst those specified.

            :param generator training_sets: Training set to use. Should be a sequence
             of tuples (x, y, metadata) where x is the training set, y is the
             correct answer for each chunk and metadata contains additional data that will
             be returned back
            :return: the metadata of the training set which yielded the best score,
             the best score obtained by the model, parameters of the model and
             fitted model itself
            :rtype: tuple
        """
        best_training, best_score, best_params, best_model = None, None, None, None
        for i, (metadata, extractor) in enumerate(training_sets):
            for model, grid in self.models:
                assert isclass(model)

                x, y = extractor.get_features(refit=True)

                grid['model_cls'] = [model]
                grid['selector_column'] = [None, extractor.lu_column()]

                search = GridSearchCV(
                    FeatureSelectedClassifier(model), param_grid=grid, **self.kwargs
                )
                search.fit(x, y)

                score, params, model = search.best_score_, search.best_params_, search.best_estimator_
                logger.debug('%s with parameters %s and training meta %s has score %s',
                             type(model), params, metadata, score)
                if best_score is None or score > best_score:
                    best_training, best_score, best_params, best_model = (x, y, metadata), score, params, model

        return best_training, best_score, best_params, best_model


# needs to be pickleable and callable

Example #18

Source File: model_selection.py From StrepHit with GNU General Public License v3.0

5 votes

def __init__(self, *models, **kwargs):
        """ Initializes the grid search

            :param list models: List of models to use. Each one should be a tuple
             with a model instance or class and a dictionary for the search space.
            :param kwargs: addition initialization arguments
             for `sklearn.grid_search.GridSearchCV`
        """
        self.models = filter(None, models)
        kwargs['refit'] = True
        self.kwargs = kwargs

Example #19

Source File: test_sklearn.py From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License

5 votes

def test_GridGlobalParams(self):
        clf = GridSearchCV(
                    self.__estimator__(layers=[L(self.__output__)], n_iter=1),
                    param_grid={'learning_rate': [0.01, 0.001]})
        clf.fit(self.a_in, self.a_out)

Example #20

Source File: model.py From crypto_predictor with MIT License

5 votes

def get_best_model(model, parameters, X_train, y_train):
    clf = GridSearchCV(model, parameters, cv=4, n_jobs=-1)
    clf.fit(X_train, y_train)
    # print(clf.best_params_)
    return clf.best_estimator_

Example #21

Source File: test_search_2.py From spark-sklearn with Apache License 2.0

5 votes

def test_example(self):
        # The classic example from the sklearn documentation
        iris = datasets.load_iris()
        parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 10]}
        svr = svm.SVC(gamma='auto')
        clf = grid_search.GridSearchCV(svr, parameters)
        clf.fit(iris.data, iris.target)

        clf2 = GridSearchCV(self.sc, svr, parameters)
        clf2.fit(iris.data, iris.target)

        b1 = clf.estimator
        b2 = clf2.estimator
        self.assertEqual(b1.get_params(), b2.get_params())

Example #22

Source File: test_search_2.py From spark-sklearn with Apache License 2.0

5 votes

def test_cv_linreg(self):
        pipeline = SKL_Pipeline([
            ('lasso', SKL_Lasso())
        ])
        parameters = {
            'lasso__alpha': (0.001, 0.005, 0.01)
        }
        grid_search = GridSearchCV(self.sc, pipeline, parameters)
        X = scipy.sparse.vstack(map(lambda x: self.list2csr([x, x+1.0]), range(0, 100)))
        y = np.array(list(range(0, 100))).reshape((100, 1))
        skl_gs = grid_search.fit(X, y)
        assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha'])

Example #23

Source File: test_search_2.py From spark-sklearn with Apache License 2.0

5 votes

def test_cv_lasso_with_mllib_featurization(self):
        data = [('hi there', 0.0),
                ('what is up', 1.0),
                ('huh', 1.0),
                ('now is the time', 5.0),
                ('for what', 0.0),
                ('the spark was there', 5.0),
                ('and so', 3.0),
                ('were many socks', 0.0),
                ('really', 1.0),
                ('too cool', 2.0)]
        data = self.sql.createDataFrame(data, ["review", "rating"])

        # Feature extraction using MLlib
        tokenizer = Tokenizer(inputCol="review", outputCol="words")
        hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20000)
        pipeline = Pipeline(stages=[tokenizer, hashingTF])
        data = pipeline.fit(data).transform(data)

        df = self.converter.toPandas(data.select(data.features.alias("review"), "rating"))

        pipeline = SKL_Pipeline([
            ('lasso', SKL_Lasso())
        ])
        parameters = {
            'lasso__alpha': (0.001, 0.005, 0.01)
        }

        grid_search = GridSearchCV(self.sc, pipeline, parameters)
        skl_gs = grid_search.fit(df.review.values, df.rating.values)
        assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha'])

Example #24

Source File: classifier.py From TextDetector with GNU General Public License v3.0

5 votes

def __init__(self, mode = 'adaboost'):
                
		if mode == 'adaboost':
			clf = GradientBoostingRegressor(
				learning_rate = 1,
				n_estimators = 1000,
				max_depth = 3,
				random_state = 0)
                elif mode == 'randomforest':
                        clf = RandomForestRegressor(
                                n_estimators = 10,
                                max_depth = None,
                                n_jobs = -1)  
		elif mode == 'SVM':
			clf = SVC(C = 10.0, 
				kernel = 'linear',
				)
		elif mode == 'vjcascade':
			clf = vjcascade(n_stage=30,
				n_esti = 1,
				l_rate = 1)
		elif mode == 'gridSearch':
			param_grid = [
			{'max_depth': [1, 2, 3], 'loss': ['ls', 'lad']},
			]
			gbr = GradientBoostingRegressor()
			clf = grid_search.GridSearchCV(gbr, param_grid, n_jobs = -1)
		else:
			raise Exception('no mode named: '+mode+' found!')

                self.classifier = clf
                self.mode = mode

Example #25

Source File: functions.py From topicModelling with GNU General Public License v3.0

5 votes

def perform_class(X, y, iterations=1):
    scores = []
    for i in range(iterations):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42+iterations)
        parameters = {'C':[0.01, 0.1, 1, 10, 100]}
        clf_acc = GridSearchCV(svm.LinearSVC(), parameters, n_jobs=3, cv=3, refit=True, scoring = 'accuracy')
        clf_acc.fit(X_train, y_train)
        scores.append([metrics.accuracy_score(y_test, clf_acc.predict(X_test)), metrics.f1_score(y_test, clf_acc.predict(X_test),average='micro')])
    acc = np.mean([x[0] for x in scores]), np.std([x[0] for x in scores])
    mif = np.mean([x[1] for x in scores]), np.std([x[1] for x in scores])
    return acc, mif

Example #26

Source File: functions.py From topicModelling with GNU General Public License v3.0

5 votes

def perform_class(X, y, iterations=1):
    scores = []
    for i in range(iterations):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42+iterations)
        parameters = {'C':[0.01, 0.1, 1, 10, 100]}
        clf_acc = GridSearchCV(svm.LinearSVC(), parameters, n_jobs=3, cv=3, refit=True, scoring = 'accuracy')
        clf_acc.fit(X_train, y_train)
        scores.append([metrics.accuracy_score(y_test, clf_acc.predict(X_test)), metrics.f1_score(y_test, clf_acc.predict(X_test),average='micro')])
    acc = np.mean([x[0] for x in scores]), np.std([x[0] for x in scores])
    mif = np.mean([x[1] for x in scores]), np.std([x[1] for x in scores])
    return acc, mif

Example #27

Source File: classification.py From pyImSegm with BSD 3-Clause "New" or "Revised" License

5 votes

def create_classif_search(name_clf, clf_pipeline, nb_labels,
                          search_type='random', cross_val=10,
                          eval_metric='f1', nb_iter=250, nb_workers=5):
    """ create sklearn search depending on spec. random or grid

    :param int nb_labels: number of labels
    :param str search_type: hyper-params search type
    :param str eval_metric: evaluation metric
    :param int nb_iter: for random number of tries
    :param str name_clf: name of classif.
    :param obj clf_pipeline: object
    :param obj cross_val: obj specific CV for fix train-test
    :param int nb_workers: number jobs running in parallel
    :return:
    """
    score_weight = 'weighted' if nb_labels > 2 else 'binary'
    scoring = metrics.make_scorer(DICT_SCORING[eval_metric.lower()],
                                  average=score_weight)
    if search_type == 'grid':
        clf_parameters = create_clf_param_search_grid(name_clf)
        logging.info('init Grid search...')
        clf_search = GridSearchCV(
            clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
            n_jobs=nb_workers, verbose=1, refit=True)
    else:
        clf_parameters = create_clf_param_search_distrib(name_clf)
        nb_iter = search_params_cut_down_max_nb_iter(clf_parameters, nb_iter)
        logging.info('init Randomized search...')
        clf_search = RandomizedSearchCV(
            clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
            n_jobs=nb_workers, n_iter=nb_iter, verbose=1, refit=True)
    return clf_search

Example #28

Source File: reduction.py From aggregation with Apache License 2.0

5 votes

def shrunk_cov_score(X):
    shrinkages = np.logspace(-2, 0, 30)
    cv = GridSearchCV(ShrunkCovariance(), {'shrinkage': shrinkages})
    return np.mean(cross_val_score(cv.fit(X).best_estimator_, X))

Example #29

Source File: ensembles.py From Azimuth with BSD 3-Clause "New" or "Revised" License

5 votes

def SVM_stacking(y_train, X_train, X_test):
    parameters = {'kernel': ('linear', 'rbf'), 'C': np.linspace(1, 10, 10), 'gamma': np.linspace(1e-3, 1., 10)}
    svr = svm.SVR()
    clf = GridSearchCV(svr, parameters, n_jobs=3, verbose=1, cv=10, scoring=spearman_scoring)
    clf.fit(X_train, y_train.flatten())
    return clf.predict(X_test)

Example #30

Source File: ensembles.py From Azimuth with BSD 3-Clause "New" or "Revised" License

5 votes

def GBR_stacking(y_train, X_train, X_test):
    param_grid = {'learning_rate': [0.1, 0.05, 0.01],
                  'max_depth': [2, 3, 4, 5],  # [2, 3, 4, 6],
                  'min_samples_leaf': [1, 2, 3],  # ,5, 7],
                  'max_features': [1.0, 0.5, 0.3, 0.1]}

    est = en.GradientBoostingRegressor(loss='ls', n_estimators=100)
    clf = GridSearchCV(est, param_grid, n_jobs=3, verbose=1, cv=20, scoring=spearman_scoring).fit(X_train, y_train.flatten())
    # clf.fit(X_train, y_train.flatten())
    return clf.predict(X_test)