Python sklearn.grid_search.GridSearchCV() Examples

The following are 30 code examples of sklearn.grid_search.GridSearchCV(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.grid_search , or try the search function .
Example #1
Source File: test_grid_search.py    From sparkit-learn with Apache License 2.0 6 votes vote down vote up
def test_same_result(self):
        X, y, Z = self.make_classification(2, 40000, nonnegative=True)

        parameters = {'alpha': [0.1, 1, 10]}
        fit_params = {'classes': np.unique(y)}

        local_estimator = MultinomialNB()
        local_grid = GridSearchCV(estimator=local_estimator,
                                  param_grid=parameters)

        estimator = SparkMultinomialNB()
        grid = SparkGridSearchCV(estimator=estimator,
                                 param_grid=parameters,
                                 fit_params=fit_params)

        local_grid.fit(X, y)
        grid.fit(Z)

        locscores = [r.mean_validation_score for r in local_grid.grid_scores_]
        scores = [r.mean_validation_score for r in grid.grid_scores_]

        assert_array_almost_equal(locscores, scores, decimal=2) 
Example #2
Source File: RegressionDecisionTree.py    From AirTicketPredicting with MIT License 6 votes vote down vote up
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_features': ['sqrt', 'log2', None],
                             'max_depth': range(2,1000),
                             }
                            ]


        reg = GridSearchCV(DecisionTreeRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_true, y_pred) 
Example #3
Source File: RegressionRidgeReg.py    From AirTicketPredicting with MIT License 6 votes vote down vote up
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'alpha': np.logspace(-5,5)
                             }
                            ]


        reg = GridSearchCV(linear_model.Ridge(alpha = 0.5), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true) 
Example #4
Source File: RegressionKNN.py    From AirTicketPredicting with MIT License 6 votes vote down vote up
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,100)
                             }
                            ]


        reg = GridSearchCV(neighbors.KNeighborsRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true) 
Example #5
Source File: RegressionRandomForest.py    From AirTicketPredicting with MIT License 6 votes vote down vote up
def parameterChoosing(self):
        #Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(20,60),
                             'n_estimators': range(10,40),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(RandomForestRegressor(n_estimators=30), tuned_parameters, cv=5, scoring='mean_squared_error')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print mean_squared_error(y_true, y_pred) 
Example #6
Source File: ClassificationSVM.py    From AirTicketPredicting with MIT License 6 votes vote down vote up
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'kernel': ['rbf'],
                             'gamma': np.logspace(-4, 3, 30),
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]},
                             {'kernel': ['poly'],
                              'degree': [1, 2, 3, 4],
                              'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000],
                              'coef0': np.logspace(-4, 3, 30)},
                            {'kernel': ['linear'],
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}]

        clf = GridSearchCV(svm.SVC(C=1), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred) 
Example #7
Source File: ClassificationLogReg.py    From AirTicketPredicting with MIT License 6 votes vote down vote up
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'penalty': ['l1'],
                             'C': np.logspace(-5,5)},
                             {'penalty': ['l2'],
                              'C': np.logspace(-5,5)}]

        clf = GridSearchCV(linear_model.LogisticRegression(tol=1e-6), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred) 
Example #8
Source File: ClassificationKNN.py    From AirTicketPredicting with MIT License 6 votes vote down vote up
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,60)
                             }
                            ]


        clf = GridSearchCV(neighbors.KNeighborsClassifier(), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred) 
Example #9
Source File: ClassificationDecisionTree.py    From AirTicketPredicting with MIT License 6 votes vote down vote up
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(2,60),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(DecisionTreeClassifier(max_depth=5), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred) 
Example #10
Source File: GC_script.py    From ClimateVegetationDynamics_GrangerCausality with GNU General Public License v3.0 6 votes vote down vote up
def nestedCrossValidation(X, y, cvFolds, estimator):  
    kf = KFold(len(X), n_folds=cvFolds, shuffle=True, random_state = 30)
    cv_j=0
    param_grid = {'alpha': [0.0000001,0.000001,0.00001,0.0001,0.001,0.01,0.1,1,10,100,1000,10000,100000, 1000000, 10000000,1000000000]}
    r2 = np.zeros((cvFolds,1))   
    for train_index, test_index in kf:
        train_X = X[train_index,:]
        test_X = X[test_index,:]
        train_y = y[train_index]
        test_y = y[test_index]
        grid = GridSearchCV(estimator, param_grid=param_grid, verbose=0, cv=cvFolds, scoring='mean_squared_error')
        grid.fit(train_X,train_y)
        y_true, y_pred = test_y,grid.best_estimator_.predict(test_X)
        r2[cv_j] = r2_score(y_true, y_pred) 
        cv_j = cv_j + 1 
    return r2
    
#%% main script 
Example #11
Source File: classif_and_ktst.py    From jstsp2015 with MIT License 6 votes vote down vote up
def compute_svm_score_nestedCV(K, y, n_folds,
                               scoring=balanced_accuracy_scoring,
                               random_state=None,
                               param_grid=[{'C': np.logspace(-5, 5, 25)}]):
    """Compute cross-validated score of SVM using precomputed kernel.
    """
    cv = StratifiedKFold(y, n_folds=n_folds, shuffle=True,
                         random_state=random_state)
    scores = np.zeros(n_folds)
    for i, (train, test) in enumerate(cv):
        cvclf = SVC(kernel='precomputed')
        y_train = y[train]
        cvcv = StratifiedKFold(y_train, n_folds=n_folds,
                               shuffle=True,
                               random_state=random_state)
        clf = GridSearchCV(cvclf, param_grid=param_grid, scoring=scoring,
                           cv=cvcv, n_jobs=1)
        clf.fit(K[train, :][:, train], y_train)
        # print clf.best_params_
        scores[i] = clf.score(K[test, :][:, train], y[test])

    return scores.mean() 
Example #12
Source File: test_search_2.py    From spark-sklearn with Apache License 2.0 6 votes vote down vote up
def test_cv_pipeline(self):
        pipeline = SKL_Pipeline([
            ('vect', SKL_HashingVectorizer(n_features=20)),
            ('tfidf', SKL_TfidfTransformer(use_idf=False)),
            ('lasso', SKL_Lasso())
        ])
        parameters = {
            'lasso__alpha': (0.001, 0.005, 0.01)
        }
        grid_search = GridSearchCV(self.sc, pipeline, parameters)
        data = [('hi there', 0.0),
                ('what is up', 1.0),
                ('huh', 1.0),
                ('now is the time', 5.0),
                ('for what', 0.0),
                ('the spark was there', 5.0),
                ('and so', 3.0),
                ('were many socks', 0.0),
                ('really', 1.0),
                ('too cool', 2.0)]
        df = self.sql.createDataFrame(data, ["review", "rating"]).toPandas()
        skl_gs = grid_search.fit(df.review.values, df.rating.values)
        assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha']) 
Example #13
Source File: 04_sent.py    From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License 6 votes vote down vote up
def __grid_search_model(clf_factory, X, Y):
    cv = ShuffleSplit(
        n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)

    param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)],
                      vect__min_df=[1, 2],
                      vect__smooth_idf=[False, True],
                      vect__use_idf=[False, True],
                      vect__sublinear_tf=[False, True],
                      vect__binary=[False, True],
                      clf__alpha=[0, 0.01, 0.05, 0.1, 0.5, 1],
                      )

    grid_search = GridSearchCV(clf_factory(),
                               param_grid=param_grid,
                               cv=cv,
                               score_func=f1_score,
                               verbose=10)
    grid_search.fit(X, Y)
    clf = grid_search.best_estimator_
    print clf

    return clf 
Example #14
Source File: 02_tuning.py    From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License 6 votes vote down vote up
def grid_search_model(clf_factory, X, Y):
    cv = ShuffleSplit(
        n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)

    param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)],
                      vect__min_df=[1, 2],
                      vect__stop_words=[None, "english"],
                      vect__smooth_idf=[False, True],
                      vect__use_idf=[False, True],
                      vect__sublinear_tf=[False, True],
                      vect__binary=[False, True],
                      clf__alpha=[0, 0.01, 0.05, 0.1, 0.5, 1],
                      )

    grid_search = GridSearchCV(clf_factory(),
                               param_grid=param_grid,
                               cv=cv,
                               score_func=f1_score,
                               verbose=10)
    grid_search.fit(X, Y)
    clf = grid_search.best_estimator_
    print clf

    return clf 
Example #15
Source File: test_sklearn.py    From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_GridLayerParams(self):
        clf = GridSearchCV(
                    self.__estimator__(layers=[L("Rectifier", units=12), L(self.__output__)], n_iter=1),
                    param_grid={'hidden0__units': [4, 8, 12]})
        clf.fit(self.a_in, self.a_out) 
Example #16
Source File: GetMLPara.py    From dr_droid with Apache License 2.0 5 votes vote down vote up
def selection_parameters_for_classfier(X,y):

    from sklearn import grid_search

    #paras={ 'n_neighbors':[1,10], 'weights':['uniform', 'distance'], 'algorithm':['auto', 'ball_tree','kd_tree', 'brute'], 'leaf_size':[20,50]}
    #knn = KNeighborsClassifier()

    #naive_bayes
    #nbg = GaussianNB()
    #nbm = MultinomialNB()
    #nbb = BernoulliNB()

    #decision tree
    #paras={ 'criterion':['gini','entropy'], 'splitter':['random', 'best'], 'max_features':[None, 'auto','sqrt', 'log2'], 'min_samples_split':[1,10]}
    #dtree = DecisionTreeClassifier()

    #random forest
    #rforest = RandomForestClassifier()
    #paras={ 'n_estimators':[2,15], 'criterion':['gini','entropy'], 'max_features': ['auto','sqrt', 'log2'], 'min_samples_split':[1,10]}

    #svm
    svmm = svm.SVC()
    paras={'kernel':['rbf','linear','poly']}


    clt =grid_search.GridSearchCV(svmm, paras, cv=5)
    clt.fit(X,y)
    print (clt)
    #print (clt.get_params())
    print (clt.set_params())
    print (clt.score(X,y))

    #scores = cross_val_score(clt,X,y,cv=10)
    #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))


#this is to get score using cross_validation 
Example #17
Source File: model_selection.py    From StrepHit with GNU General Public License v3.0 5 votes vote down vote up
def fit(self, training_sets):
        """ Searches for the best estimator and its arguments as well as the best
            training set amongst those specified.

            :param generator training_sets: Training set to use. Should be a sequence
             of tuples (x, y, metadata) where x is the training set, y is the
             correct answer for each chunk and metadata contains additional data that will
             be returned back
            :return: the metadata of the training set which yielded the best score,
             the best score obtained by the model, parameters of the model and
             fitted model itself
            :rtype: tuple
        """
        best_training, best_score, best_params, best_model = None, None, None, None
        for i, (metadata, extractor) in enumerate(training_sets):
            for model, grid in self.models:
                assert isclass(model)

                x, y = extractor.get_features(refit=True)

                grid['model_cls'] = [model]
                grid['selector_column'] = [None, extractor.lu_column()]

                search = GridSearchCV(
                    FeatureSelectedClassifier(model), param_grid=grid, **self.kwargs
                )
                search.fit(x, y)

                score, params, model = search.best_score_, search.best_params_, search.best_estimator_
                logger.debug('%s with parameters %s and training meta %s has score %s',
                             type(model), params, metadata, score)
                if best_score is None or score > best_score:
                    best_training, best_score, best_params, best_model = (x, y, metadata), score, params, model

        return best_training, best_score, best_params, best_model


# needs to be pickleable and callable 
Example #18
Source File: model_selection.py    From StrepHit with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, *models, **kwargs):
        """ Initializes the grid search

            :param list models: List of models to use. Each one should be a tuple
             with a model instance or class and a dictionary for the search space.
            :param kwargs: addition initialization arguments
             for `sklearn.grid_search.GridSearchCV`
        """
        self.models = filter(None, models)
        kwargs['refit'] = True
        self.kwargs = kwargs 
Example #19
Source File: test_sklearn.py    From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_GridGlobalParams(self):
        clf = GridSearchCV(
                    self.__estimator__(layers=[L(self.__output__)], n_iter=1),
                    param_grid={'learning_rate': [0.01, 0.001]})
        clf.fit(self.a_in, self.a_out) 
Example #20
Source File: model.py    From crypto_predictor with MIT License 5 votes vote down vote up
def get_best_model(model, parameters, X_train, y_train):
    clf = GridSearchCV(model, parameters, cv=4, n_jobs=-1)
    clf.fit(X_train, y_train)
    # print(clf.best_params_)
    return clf.best_estimator_ 
Example #21
Source File: test_search_2.py    From spark-sklearn with Apache License 2.0 5 votes vote down vote up
def test_example(self):
        # The classic example from the sklearn documentation
        iris = datasets.load_iris()
        parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 10]}
        svr = svm.SVC(gamma='auto')
        clf = grid_search.GridSearchCV(svr, parameters)
        clf.fit(iris.data, iris.target)

        clf2 = GridSearchCV(self.sc, svr, parameters)
        clf2.fit(iris.data, iris.target)

        b1 = clf.estimator
        b2 = clf2.estimator
        self.assertEqual(b1.get_params(), b2.get_params()) 
Example #22
Source File: test_search_2.py    From spark-sklearn with Apache License 2.0 5 votes vote down vote up
def test_cv_linreg(self):
        pipeline = SKL_Pipeline([
            ('lasso', SKL_Lasso())
        ])
        parameters = {
            'lasso__alpha': (0.001, 0.005, 0.01)
        }
        grid_search = GridSearchCV(self.sc, pipeline, parameters)
        X = scipy.sparse.vstack(map(lambda x: self.list2csr([x, x+1.0]), range(0, 100)))
        y = np.array(list(range(0, 100))).reshape((100, 1))
        skl_gs = grid_search.fit(X, y)
        assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha']) 
Example #23
Source File: test_search_2.py    From spark-sklearn with Apache License 2.0 5 votes vote down vote up
def test_cv_lasso_with_mllib_featurization(self):
        data = [('hi there', 0.0),
                ('what is up', 1.0),
                ('huh', 1.0),
                ('now is the time', 5.0),
                ('for what', 0.0),
                ('the spark was there', 5.0),
                ('and so', 3.0),
                ('were many socks', 0.0),
                ('really', 1.0),
                ('too cool', 2.0)]
        data = self.sql.createDataFrame(data, ["review", "rating"])

        # Feature extraction using MLlib
        tokenizer = Tokenizer(inputCol="review", outputCol="words")
        hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20000)
        pipeline = Pipeline(stages=[tokenizer, hashingTF])
        data = pipeline.fit(data).transform(data)

        df = self.converter.toPandas(data.select(data.features.alias("review"), "rating"))

        pipeline = SKL_Pipeline([
            ('lasso', SKL_Lasso())
        ])
        parameters = {
            'lasso__alpha': (0.001, 0.005, 0.01)
        }

        grid_search = GridSearchCV(self.sc, pipeline, parameters)
        skl_gs = grid_search.fit(df.review.values, df.rating.values)
        assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha']) 
Example #24
Source File: classifier.py    From TextDetector with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, mode = 'adaboost'):
                
		if mode == 'adaboost':
			clf = GradientBoostingRegressor(
				learning_rate = 1,
				n_estimators = 1000,
				max_depth = 3,
				random_state = 0)
                elif mode == 'randomforest':
                        clf = RandomForestRegressor(
                                n_estimators = 10,
                                max_depth = None,
                                n_jobs = -1)  
		elif mode == 'SVM':
			clf = SVC(C = 10.0, 
				kernel = 'linear',
				)
		elif mode == 'vjcascade':
			clf = vjcascade(n_stage=30,
				n_esti = 1,
				l_rate = 1)
		elif mode == 'gridSearch':
			param_grid = [
			{'max_depth': [1, 2, 3], 'loss': ['ls', 'lad']},
			]
			gbr = GradientBoostingRegressor()
			clf = grid_search.GridSearchCV(gbr, param_grid, n_jobs = -1)
		else:
			raise Exception('no mode named: '+mode+' found!')

                self.classifier = clf
                self.mode = mode 
Example #25
Source File: functions.py    From topicModelling with GNU General Public License v3.0 5 votes vote down vote up
def perform_class(X, y, iterations=1):
    scores = []
    for i in range(iterations):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42+iterations)
        parameters = {'C':[0.01, 0.1, 1, 10, 100]}
        clf_acc = GridSearchCV(svm.LinearSVC(), parameters, n_jobs=3, cv=3, refit=True, scoring = 'accuracy')
        clf_acc.fit(X_train, y_train)
        scores.append([metrics.accuracy_score(y_test, clf_acc.predict(X_test)), metrics.f1_score(y_test, clf_acc.predict(X_test),average='micro')])
    acc = np.mean([x[0] for x in scores]), np.std([x[0] for x in scores])
    mif = np.mean([x[1] for x in scores]), np.std([x[1] for x in scores])
    return acc, mif 
Example #26
Source File: functions.py    From topicModelling with GNU General Public License v3.0 5 votes vote down vote up
def perform_class(X, y, iterations=1):
    scores = []
    for i in range(iterations):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42+iterations)
        parameters = {'C':[0.01, 0.1, 1, 10, 100]}
        clf_acc = GridSearchCV(svm.LinearSVC(), parameters, n_jobs=3, cv=3, refit=True, scoring = 'accuracy')
        clf_acc.fit(X_train, y_train)
        scores.append([metrics.accuracy_score(y_test, clf_acc.predict(X_test)), metrics.f1_score(y_test, clf_acc.predict(X_test),average='micro')])
    acc = np.mean([x[0] for x in scores]), np.std([x[0] for x in scores])
    mif = np.mean([x[1] for x in scores]), np.std([x[1] for x in scores])
    return acc, mif 
Example #27
Source File: classification.py    From pyImSegm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_classif_search(name_clf, clf_pipeline, nb_labels,
                          search_type='random', cross_val=10,
                          eval_metric='f1', nb_iter=250, nb_workers=5):
    """ create sklearn search depending on spec. random or grid

    :param int nb_labels: number of labels
    :param str search_type: hyper-params search type
    :param str eval_metric: evaluation metric
    :param int nb_iter: for random number of tries
    :param str name_clf: name of classif.
    :param obj clf_pipeline: object
    :param obj cross_val: obj specific CV for fix train-test
    :param int nb_workers: number jobs running in parallel
    :return:
    """
    score_weight = 'weighted' if nb_labels > 2 else 'binary'
    scoring = metrics.make_scorer(DICT_SCORING[eval_metric.lower()],
                                  average=score_weight)
    if search_type == 'grid':
        clf_parameters = create_clf_param_search_grid(name_clf)
        logging.info('init Grid search...')
        clf_search = GridSearchCV(
            clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
            n_jobs=nb_workers, verbose=1, refit=True)
    else:
        clf_parameters = create_clf_param_search_distrib(name_clf)
        nb_iter = search_params_cut_down_max_nb_iter(clf_parameters, nb_iter)
        logging.info('init Randomized search...')
        clf_search = RandomizedSearchCV(
            clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
            n_jobs=nb_workers, n_iter=nb_iter, verbose=1, refit=True)
    return clf_search 
Example #28
Source File: reduction.py    From aggregation with Apache License 2.0 5 votes vote down vote up
def shrunk_cov_score(X):
    shrinkages = np.logspace(-2, 0, 30)
    cv = GridSearchCV(ShrunkCovariance(), {'shrinkage': shrinkages})
    return np.mean(cross_val_score(cv.fit(X).best_estimator_, X)) 
Example #29
Source File: ensembles.py    From Azimuth with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def SVM_stacking(y_train, X_train, X_test):
    parameters = {'kernel': ('linear', 'rbf'), 'C': np.linspace(1, 10, 10), 'gamma': np.linspace(1e-3, 1., 10)}
    svr = svm.SVR()
    clf = GridSearchCV(svr, parameters, n_jobs=3, verbose=1, cv=10, scoring=spearman_scoring)
    clf.fit(X_train, y_train.flatten())
    return clf.predict(X_test) 
Example #30
Source File: ensembles.py    From Azimuth with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def GBR_stacking(y_train, X_train, X_test):
    param_grid = {'learning_rate': [0.1, 0.05, 0.01],
                  'max_depth': [2, 3, 4, 5],  # [2, 3, 4, 6],
                  'min_samples_leaf': [1, 2, 3],  # ,5, 7],
                  'max_features': [1.0, 0.5, 0.3, 0.1]}

    est = en.GradientBoostingRegressor(loss='ls', n_estimators=100)
    clf = GridSearchCV(est, param_grid, n_jobs=3, verbose=1, cv=20, scoring=spearman_scoring).fit(X_train, y_train.flatten())
    # clf.fit(X_train, y_train.flatten())
    return clf.predict(X_test)