Python sklearn.linear_model.LassoCV() Examples

The following are 29 code examples of sklearn.linear_model.LassoCV(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.linear_model , or try the search function .
Example #1
Source File: test_coordinate_descent.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_lasso_cv():
    X, y, X_test, y_test = build_dataset()
    max_iter = 150
    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    # Check that the lars and the coordinate descent implementation
    # select a similar alpha
    lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y)
    # for this we check that they don't fall in the grid of
    # clf.alphas further than 1
    assert np.abs(np.searchsorted(clf.alphas_[::-1], lars.alpha_) -
                  np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1
    # check that they also give a similar MSE
    mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T)
    np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(),
                                   clf.mse_path_[5].mean(), significant=2)

    # test set
    assert_greater(clf.score(X_test, y_test), 0.99) 
Example #2
Source File: test_coordinate_descent.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_lasso_cv_with_some_model_selection():
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import StratifiedKFold
    from sklearn import datasets
    from sklearn.linear_model import LassoCV

    diabetes = datasets.load_diabetes()
    X = diabetes.data
    y = diabetes.target

    pipe = make_pipeline(
        StandardScaler(),
        LassoCV(cv=StratifiedKFold(n_splits=5))
    )
    pipe.fit(X, y) 
Example #3
Source File: ewa.py    From pycobra with MIT License 6 votes vote down vote up
def load_default(self, machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm']):
        """
        Loads 4 different scikit-learn regressors by default.

        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded.

        """
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_)
            except ValueError:
                continue 
Example #4
Source File: gd_poisoners.py    From manip-ml with MIT License 6 votes vote down vote up
def learn_model(self, x, y, clf, lam = None):
        if (lam is None and self.initlam != -1): # hack for first training
            lam = self.initlam
        if clf is None:
            if lam is None:
                clf = linear_model.LassoCV(max_iter=10000)
                clf.fit(x, y)
                lam = clf.alpha_
            clf = linear_model.Lasso(alpha = lam, \
                                 max_iter = 10000, \
                                 warm_start = True)
        clf.fit(x, y)
        return clf, lam


############################################################################################
# Implements GD Poisoning for Ridge Linear Regression
############################################################################################ 
Example #5
Source File: test_coordinate_descent.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_lasso_cv_with_some_model_selection():
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import StratifiedKFold
    from sklearn import datasets
    from sklearn.linear_model import LassoCV

    diabetes = datasets.load_diabetes()
    X = diabetes.data
    y = diabetes.target

    pipe = make_pipeline(
        StandardScaler(),
        LassoCV(cv=StratifiedKFold(n_splits=5))
    )
    pipe.fit(X, y) 
Example #6
Source File: test_coordinate_descent.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_lasso_cv():
    X, y, X_test, y_test = build_dataset()
    max_iter = 150
    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    # Check that the lars and the coordinate descent implementation
    # select a similar alpha
    lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y)
    # for this we check that they don't fall in the grid of
    # clf.alphas further than 1
    assert_true(np.abs(
        np.searchsorted(clf.alphas_[::-1], lars.alpha_) -
        np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1)
    # check that they also give a similar MSE
    mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T)
    np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(),
                                   clf.mse_path_[5].mean(), significant=2)

    # test set
    assert_greater(clf.score(X_test, y_test), 0.99) 
Example #7
Source File: feature_selection.py    From CatLearn with GNU General Public License v3.0 6 votes vote down vote up
def feature_inspection(self, lower=0, upper=1, interval=10**2,
                           alpha_list=None):
        """Generate interval used to search for the alpha.

        Parameters
        ----------
        lower : int
            Lower bound for the interval search.
        upper : int
            Upper bound for the interval search.
        interval: int
            Number of alphas in interval inspected.
        """
        feat_vec, alpha_vec = [], []
        if alpha_list is None:
            alpha_list = np.linspace(float(upper), float(lower), int(interval))
        for alpha in alpha_list:
            model = LassoCV(alphas=[alpha], cv=3).fit(X=self.train_features,
                                                      y=self.train_targets)
            feat_vec.append(np.shape(np.nonzero(model.coef_))[1])
            alpha_vec.append(alpha)
        return feat_vec, alpha_vec, np.nonzero(model.coef_) 
Example #8
Source File: FeatureSelector.py    From CDSS with GNU General Public License v3.0 5 votes vote down vote up
def _eliminate_recursively(self, k=None):
        if self._problem == FeatureSelector.CLASSIFICATION:
            estimator = RandomForestClassifier(random_state=self._random_state)
        else:
            estimator = LassoCV(random_state=self._random_state)
        # If k is not specified, then use RFECV to automatically decide on
        # optimal number of features. If specified, then use RFE.
        if k is None:
            self._selector = RFECV(estimator)
        else:
            self._selector = RFE(estimator, n_features_to_select=k, step=0.05) 
Example #9
Source File: test_coordinate_descent.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_precompute_invalid_argument():
    X, y, _, _ = build_dataset()
    for clf in [ElasticNetCV(precompute="invalid"),
                LassoCV(precompute="invalid")]:
        assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*"
                            "array-like.*Got 'invalid'", clf.fit, X, y)

    # Precompute = 'auto' is not supported for ElasticNet
    assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
                        "Got 'auto'", ElasticNet(precompute='auto').fit, X, y) 
Example #10
Source File: Regressor.py    From CDSS with GNU General Public License v3.0 5 votes vote down vote up
def _train_lasso(self, X, y):
        self._model = LassoCV()
        self._model.fit(X, y) 
Example #11
Source File: test_sklearn_glm_regressor_converter.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def test_model_lasso_cv(self):
        model, X = fit_regression_model(linear_model.LassoCV())
        model_onnx = convert_sklearn(
            model, "lasso cv",
            [("input", FloatTensorType([None, X.shape[1]]))])
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnLassoCV-Dec4",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example #12
Source File: test_linear_model.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.linear_model.ARDRegression, lm.ARDRegression)
        self.assertIs(df.linear_model.BayesianRidge, lm.BayesianRidge)
        self.assertIs(df.linear_model.ElasticNet, lm.ElasticNet)
        self.assertIs(df.linear_model.ElasticNetCV, lm.ElasticNetCV)

        self.assertIs(df.linear_model.HuberRegressor, lm.HuberRegressor)

        self.assertIs(df.linear_model.Lars, lm.Lars)
        self.assertIs(df.linear_model.LarsCV, lm.LarsCV)
        self.assertIs(df.linear_model.Lasso, lm.Lasso)
        self.assertIs(df.linear_model.LassoCV, lm.LassoCV)
        self.assertIs(df.linear_model.LassoLars, lm.LassoLars)
        self.assertIs(df.linear_model.LassoLarsCV, lm.LassoLarsCV)
        self.assertIs(df.linear_model.LassoLarsIC, lm.LassoLarsIC)

        self.assertIs(df.linear_model.LinearRegression, lm.LinearRegression)
        self.assertIs(df.linear_model.LogisticRegression, lm.LogisticRegression)
        self.assertIs(df.linear_model.LogisticRegressionCV, lm.LogisticRegressionCV)
        self.assertIs(df.linear_model.MultiTaskLasso, lm.MultiTaskLasso)
        self.assertIs(df.linear_model.MultiTaskElasticNet, lm.MultiTaskElasticNet)
        self.assertIs(df.linear_model.MultiTaskLassoCV, lm.MultiTaskLassoCV)
        self.assertIs(df.linear_model.MultiTaskElasticNetCV, lm.MultiTaskElasticNetCV)

        self.assertIs(df.linear_model.OrthogonalMatchingPursuit, lm.OrthogonalMatchingPursuit)
        self.assertIs(df.linear_model.OrthogonalMatchingPursuitCV, lm.OrthogonalMatchingPursuitCV)
        self.assertIs(df.linear_model.PassiveAggressiveClassifier, lm.PassiveAggressiveClassifier)
        self.assertIs(df.linear_model.PassiveAggressiveRegressor, lm.PassiveAggressiveRegressor)

        self.assertIs(df.linear_model.Perceptron, lm.Perceptron)
        self.assertIs(df.linear_model.RandomizedLasso, lm.RandomizedLasso)
        self.assertIs(df.linear_model.RandomizedLogisticRegression, lm.RandomizedLogisticRegression)
        self.assertIs(df.linear_model.RANSACRegressor, lm.RANSACRegressor)
        self.assertIs(df.linear_model.Ridge, lm.Ridge)
        self.assertIs(df.linear_model.RidgeClassifier, lm.RidgeClassifier)
        self.assertIs(df.linear_model.RidgeClassifierCV, lm.RidgeClassifierCV)
        self.assertIs(df.linear_model.RidgeCV, lm.RidgeCV)
        self.assertIs(df.linear_model.SGDClassifier, lm.SGDClassifier)
        self.assertIs(df.linear_model.SGDRegressor, lm.SGDRegressor)
        self.assertIs(df.linear_model.TheilSenRegressor, lm.TheilSenRegressor) 
Example #13
Source File: beamformers_electrodes_tweak.py    From mmvt with GNU General Public License v3.0 5 votes vote down vote up
def calc_optimization_features(optimization_method, freqs_bins, cond, meg_data_dic, elec_data, electrodes, from_t, to_t, optimization_params={}):
    # scorer = make_scorer(rol_corr, False)
    cv_parameters = []
    if optimization_method in ['Ridge', 'RidgeCV', 'Lasso', 'LassoCV', 'ElasticNet', 'ElasticNetCV']:
        # vstack all meg data, such that X.shape = T*n X F, where n is the electrodes num
        # Y is T*n * 1
        X = np.hstack((meg_data_dic[electrode][:, from_t:to_t] for electrode in electrodes))
        Y = np.hstack((elec_data[electrode][cond][from_t:to_t] for electrode in electrodes))
        funcs_dic = {'Ridge': Ridge(alpha=0.1), 'RidgeCV':RidgeCV(np.logspace(0, -10, 11)), # scoring=scorer
            'Lasso': Lasso(alpha=1.0/X.shape[0]), 'LassoCV':LassoCV(alphas=np.logspace(0, -10, 11), max_iter=1000),
            'ElasticNetCV': ElasticNetCV(alphas= np.logspace(0, -10, 11), l1_ratio=np.linspace(0, 1, 11))}
        clf = funcs_dic[optimization_method]
        clf.fit(X.T, Y)
        p = clf.coef_
        if len(p) != len(freqs_bins):
            raise Exception('{} (len(clf.coef)) != {} (len(freqs_bin))!!!'.format(len(p), len(freqs_bins)))
        if optimization_method in ['RidgeCV', 'LassoCV']:
            cv_parameters = clf.alpha_
        elif optimization_method == 'ElasticNetCV':
            cv_parameters = [clf.alpha_, clf.l1_ratio_]
        args = [(meg_pred(p, meg_data_dic[electrode][:, from_t:to_t]), elec_data[electrode][cond][from_t:to_t]) for electrode in electrodes]
        p0 = leastsq(post_ridge_err_func, [1], args=args, maxfev=0)[0]
        p = np.hstack((p0, p))
    elif optimization_method in ['leastsq', 'dtw', 'minmax', 'diff_rms', 'rol_corr']:
        args = ([(meg_data_dic[electrode][:, from_t:to_t], elec_data[electrode][cond][from_t:to_t]) for electrode in electrodes], optimization_params)
        p0 = np.ones((1, len(freqs_bins)+1))
        funcs_dic = {'leastsq': partial(leastsq, func=err_func, x0=p0, args=args),
                     'dtw': partial(minimize, fun=dtw_err_func, x0=p0, args=args),
                     'minmax': partial(minimize, fun=minmax_err_func, x0=p0, args=args),
                     'diff_rms': partial(minimize, fun=min_diff_rms_err_func, x0=p0, args=args),
                     'rol_corr': partial(minimize, fun=max_rol_corr, x0=p0, args=args)}
        res = funcs_dic[optimization_method]()
        p = res[0] if optimization_method=='leastsq' else res.x
        cv_parameters = optimization_params
    else:
        raise Exception('Unknown optimization_method! {}'.format(optimization_method))
    return p, cv_parameters 
Example #14
Source File: test_coordinate_descent.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_lasso_cv_positive_constraint():
    X, y, X_test, y_test = build_dataset()
    max_iter = 500

    # Ensure the unconstrained fit has a negative coefficient
    clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2,
                                n_jobs=1)
    clf_unconstrained.fit(X, y)
    assert_true(min(clf_unconstrained.coef_) < 0)

    # On same data, constrained fit has non-negative coefficients
    clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter,
                              positive=True, cv=2, n_jobs=1)
    clf_constrained.fit(X, y)
    assert_true(min(clf_constrained.coef_) >= 0) 
Example #15
Source File: test_coordinate_descent.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_uniform_targets():
    enet = ElasticNetCV(fit_intercept=True, n_alphas=3)
    m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3)
    lasso = LassoCV(fit_intercept=True, n_alphas=3)
    m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3)

    models_single_task = (enet, lasso)
    models_multi_task = (m_enet, m_lasso)

    rng = np.random.RandomState(0)

    X_train = rng.random_sample(size=(10, 3))
    X_test = rng.random_sample(size=(10, 3))

    y1 = np.empty(10)
    y2 = np.empty((10, 2))

    for model in models_single_task:
        for y_values in (0, 5):
            y1.fill(y_values)
            assert_array_equal(model.fit(X_train, y1).predict(X_test), y1)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)

    for model in models_multi_task:
        for y_values in (0, 5):
            y2[:, 0].fill(y_values)
            y2[:, 1].fill(2 * y_values)
            assert_array_equal(model.fit(X_train, y2).predict(X_test), y2)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3) 
Example #16
Source File: test_coordinate_descent.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_1d_multioutput_lasso_and_multitask_lasso_cv():
    X, y, _, _ = build_dataset(n_features=10)
    y = y[:, np.newaxis]
    clf = LassoCV(n_alphas=5, eps=2e-3)
    clf.fit(X, y[:, 0])
    clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3)
    clf1.fit(X, y)
    assert_almost_equal(clf.alpha_, clf1.alpha_)
    assert_almost_equal(clf.coef_, clf1.coef_[0])
    assert_almost_equal(clf.intercept_, clf1.intercept_[0]) 
Example #17
Source File: TermDocMatrix.py    From scattertext with Apache License 2.0 5 votes vote down vote up
def get_logistic_regression_coefs_l1(self, category,
                                         clf=LassoCV(alphas=[0.1, 0.001],
                                                     max_iter=10000,
                                                     n_jobs=-1)):
        ''' Computes l1-penalized logistic regression score.
        Parameters
        ----------
        category : str
            category name to score

        Returns
        -------
            (coefficient array, accuracy, majority class baseline accuracy)
        '''
        try:
            from sklearn.cross_validation import cross_val_predict
        except:
            from sklearn.model_selection import cross_val_predict
        y = self._get_mask_from_category(category)
        y_continuous = self._get_continuous_version_boolean_y(y)
        # X = TfidfTransformer().fit_transform(self._X)
        X = self._X

        clf.fit(X, y_continuous)
        y_hat = (cross_val_predict(clf, X, y_continuous) > 0)
        acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat)
        clf.fit(X, y_continuous)
        return clf.coef_, acc, baseline 
Example #18
Source File: kernelcobra.py    From pycobra with MIT License 5 votes vote down vote up
def load_default(self, machine_list='basic'):
        """
        Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. 
        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded. 
            Default is basic,
        Returns
        -------
        self : returns an instance of self.
        """
        if machine_list == 'basic':
            machine_list = ['tree', 'ridge', 'random_forest', 'svm']
        if machine_list == 'advanced':
            machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm', 'bayesian_ridge', 'sgd']

        self.estimators_ = {}
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_)
                if machine == 'sgd':
                    self.estimators_['sgd'] = linear_model.SGDRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'bayesian_ridge':
                    self.estimators_['bayesian_ridge'] = linear_model.BayesianRidge().fit(self.X_k_, self.y_k_)
            except ValueError:
                continue
        return self 
Example #19
Source File: cobra.py    From pycobra with MIT License 5 votes vote down vote up
def load_default(self, machine_list='basic'):
        """
        Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. 

        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded.
        Returns
        -------
        self : returns an instance of self.
        """

        if machine_list == 'basic':
            machine_list = ['tree', 'ridge', 'random_forest', 'svm']
        if machine_list == 'advanced':
            machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm', 'bayesian_ridge', 'sgd']

        self.estimators_ = {}
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = LinearSVR(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'sgd':
                    self.estimators_['sgd'] = linear_model.SGDRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'bayesian_ridge':
                    self.estimators_['bayesian_ridge'] = linear_model.BayesianRidge().fit(self.X_k_, self.y_k_)
            except ValueError:
                continue
        return self 
Example #20
Source File: scikit_wrapper.py    From CatLearn with GNU General Public License v3.0 5 votes vote down vote up
def _lasso(self):
        """Order features according to their corresponding coefficients."""
        if self.line_search:
            pred = None
            try:
                alpha_list = np.geomspace(self.max_alpha, self.min_alpha,
                                          self.steps)
            except AttributeError:
                alpha_list = np.exp(np.linspace(np.log(self.max_alpha),
                                                np.log(self.min_alpha),
                                                self.steps))
            for alpha in alpha_list:
                regr = Lasso(alpha=alpha, max_iter=self.iter,
                             fit_intercept=True, normalize=True,
                             selection='random')
                model = regr.fit(self.train_matrix, self.train_target)
                nz = len(model.coef_) - (model.coef_ == 0.).sum()
                if nz >= self.size:
                    coeff = model.coef_
                    break
        else:
            regr = LassoCV(fit_intercept=True, normalize=True,
                           n_alphas=self.steps, max_iter=self.iter,
                           eps=self.eps, cv=None)
            model = regr.fit(X=self.train_matrix, y=self.train_target)
            coeff = model.coef_

            # Make the linear prediction.
            pred = None
            if self.predict:
                data = model.predict(self.test_matrix)
                pred = get_error(prediction=data,
                                 target=self.test_target)['average']

        return coeff, pred 
Example #21
Source File: Booster.py    From Fast-and-Accurate-Least-Mean-Squares-Solvers with MIT License 5 votes vote down vote up
def get_new_clf(solver, folds=3, alphas=100):
    kf=KFold(n_splits=folds,shuffle=False)
    if "linear" == solver:
        clf = linear_model.LinearRegression(fit_intercept=False)
    if "ridge" == solver:
        alphas =  np.arange(1/alphas, 10+ 1/alphas, 10/alphas)
        clf = linear_model.RidgeCV(alphas=alphas, fit_intercept=False, cv=kf)
    elif "lasso" == solver:
        clf=linear_model.LassoCV(n_alphas=alphas, fit_intercept=False, cv=kf)
    elif "elastic" == solver:
        clf = linear_model.ElasticNetCV(n_alphas=alphas, fit_intercept=False, cv=kf)
    return clf 
Example #22
Source File: test_coordinate_descent.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_precompute_invalid_argument():
    X, y, _, _ = build_dataset()
    for clf in [ElasticNetCV(precompute="invalid"),
                LassoCV(precompute="invalid")]:
        assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*"
                            "array-like.*Got 'invalid'", clf.fit, X, y)

    # Precompute = 'auto' is not supported for ElasticNet
    assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
                        "Got 'auto'", ElasticNet(precompute='auto').fit, X, y) 
Example #23
Source File: test_coordinate_descent.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_1d_multioutput_lasso_and_multitask_lasso_cv():
    X, y, _, _ = build_dataset(n_features=10)
    y = y[:, np.newaxis]
    clf = LassoCV(n_alphas=5, eps=2e-3)
    clf.fit(X, y[:, 0])
    clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3)
    clf1.fit(X, y)
    assert_almost_equal(clf.alpha_, clf1.alpha_)
    assert_almost_equal(clf.coef_, clf1.coef_[0])
    assert_almost_equal(clf.intercept_, clf1.intercept_[0]) 
Example #24
Source File: test_coordinate_descent.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_uniform_targets():
    enet = ElasticNetCV(fit_intercept=True, n_alphas=3)
    m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3)
    lasso = LassoCV(fit_intercept=True, n_alphas=3)
    m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3)

    models_single_task = (enet, lasso)
    models_multi_task = (m_enet, m_lasso)

    rng = np.random.RandomState(0)

    X_train = rng.random_sample(size=(10, 3))
    X_test = rng.random_sample(size=(10, 3))

    y1 = np.empty(10)
    y2 = np.empty((10, 2))

    for model in models_single_task:
        for y_values in (0, 5):
            y1.fill(y_values)
            assert_array_equal(model.fit(X_train, y1).predict(X_test), y1)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)

    for model in models_multi_task:
        for y_values in (0, 5):
            y2[:, 0].fill(y_values)
            y2[:, 1].fill(2 * y_values)
            assert_array_equal(model.fit(X_train, y2).predict(X_test), y2)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3) 
Example #25
Source File: test_coordinate_descent.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_lasso_cv_positive_constraint():
    X, y, X_test, y_test = build_dataset()
    max_iter = 500

    # Ensure the unconstrained fit has a negative coefficient
    clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2,
                                n_jobs=1)
    clf_unconstrained.fit(X, y)
    assert min(clf_unconstrained.coef_) < 0

    # On same data, constrained fit has non-negative coefficients
    clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter,
                              positive=True, cv=2, n_jobs=1)
    clf_constrained.fit(X, y)
    assert min(clf_constrained.coef_) >= 0 
Example #26
Source File: match_space.py    From SparseSC with MIT License 5 votes vote down vote up
def _D_LassoCV_MatchSpace(
    X, Y, X_full, D_full, v_pens=None, n_v_cv=5, sample_frac=1, y_V_share=0.5, **kwargs
):  # pylint: disable=missing-param-doc, unused-argument
    if sample_frac < 1:
        N_y = X.shape[0]
        sample_y = np.random.choice(N_y, int(sample_frac * N_y), replace=False)
        X = X[sample_y, :]
        Y = Y[sample_y, :]
        N_d = D_full.shape[0]
        sample_d = np.random.choice(N_d, int(sample_frac * N_d), replace=False)
        X_full = X_full[sample_d, :]
        D_full = D_full[sample_d]
    y_varselectorfit = MultiTaskLassoCV(normalize=True, cv=n_v_cv, alphas=v_pens).fit(
        X, Y
    )
    y_V = np.sqrt(
        np.sum(np.square(y_varselectorfit.coef_), axis=0)
    )  # n_tasks x n_features -> n_feature
    best_y_v_pen = y_varselectorfit.alpha_

    d_varselectorfit = LassoCV(normalize=True, cv=n_v_cv, alphas=v_pens).fit(
        X_full, D_full
    )
    d_V = np.abs(d_varselectorfit.coef_)
    best_d_v_pen = d_varselectorfit.alpha_

    m_sel = (y_V + d_V) != 0
    transformer = SelMatchSpace(m_sel)
    if y_V.sum() == 0:
        V = d_V
    elif d_V.sum() == 0:
        V = y_V
    else:
        V = y_V_share * y_V / (y_V.sum()) + (1 - y_V_share) * d_V / (2 * d_V.sum())
    return transformer, V[m_sel], (best_y_v_pen, best_d_v_pen), V 
Example #27
Source File: feature_selection.py    From ecg-classification with GNU General Public License v3.0 4 votes vote down vote up
def run_feature_selection(features, labels, feature_selection, best_features):
    
    if feature_selection == 'select_K_Best':
        # feature extraction
        selector = SelectKBest(score_func=f_classif, k=4) # score_func=chi2 : only for non-negative features
        selector.fit(features, labels)
        # summarize scores
        scores = selector.scores_
        features_index_sorted = np.argsort(-scores)
        features_selected = features[:, features_index_sorted[0:best_features]]

    # SelectFromModel and LassoCV
    
    # We use the base estimator LassoCV since the L1 norm promotes sparsity of features.
    if feature_selection == 'LassoCV':
        clf = LassoCV()

        # Set a minimum threshold of 0.25
        sfm = SelectFromModel(clf, threshold=0.95)
        sfm.fit(features, labels)
        features_selected = sfm.transform(features).shape[1]

        """
        # Reset the threshold till the number of features equals two.
        # Note that the attribute can be set directly instead of repeatedly
        # fitting the metatransformer.
        while n_features > 2:
            sfm.threshold += 0.1
            X_transform = sfm.transform(X)
            n_features = X_transform.shape[1]
        """

    # Univariate feature selection
    # Univariate feature selection works by selecting the best features based on univariate statistical tests. 
    # It can be seen as a preprocessing step to an estimator. 
    # Scikit-learn exposes feature selection routines as objects that implement the transform method:
    #   - SelectKBest removes all but the k highest scoring features
    #   - SelectPercentile removes all but a user-specified highest scoring percentage of features
    #       common univariate statistical tests for each feature: false positive rate SelectFpr, false discovery rate SelectFdr, or family wise error SelectFwe.
    #   - GenericUnivariateSelect allows to perform univariate feature selection with a configurable strategy. This allows to select the best univariate selection strategy with hyper-parameter search estimator.

    if feature_selection == 'slct_percentile':
        selector = SelectPercentile(f_classif, percentile=10)
        selector.fit(features, labels)
        # The percentile not affect. 
        # Just select in order the top features by number or threshold

        # Keep best 8 values?
        scores = selector.scores_
        features_index_sorted = np.argsort(-scores)
        # scores = selector.scores_

        # scores = -np.log10(selector.pvalues_)
        # scores /= scores.max()

        features_selected = features[:, features_index_sorted[0:best_features]]

    print("Selected only " + str(features_selected.shape) + " features ")

    return features_selected, features_index_sorted 
Example #28
Source File: modeler.py    From rsmtool with Apache License 2.0 4 votes vote down vote up
def train_positive_lasso_cv(self, df_train, feature_columns):
        """
        Train `PositiveLassoCV` (formerly lassoWtLassoBest) -
        Feature selection using lasso regression optimized for log likelihood
        using cross validation.


        Parameters
        ----------
        df_train : pd.DataFrame
            Data frame containing the features on which
            to train the model.
        feature_columns : list
            A list of feature columns to use in training the model.

        Returns
        -------
        learner : skll.Learner
            The SKLL learner object
        fit : statsmodels.RegressionResults
            A statsmodels regression results object or None.
        df_coef : pd.DataFrame
            The model coefficients in a data_frame
        used_features : list
            A list of features used in the final model.
        """
        # train a LassoCV outside of SKLL since it's not exposed there
        X = df_train[feature_columns].values
        y = df_train['sc1'].values
        clf = LassoCV(cv=10, positive=True, random_state=1234567890)
        model = clf.fit(X, y)

        # save the non-zero model coefficients and intercept to a data frame
        non_zero_features, non_zero_feature_values = [], []
        for feature, coefficient in zip(feature_columns, model.coef_):
            if coefficient != 0:
                non_zero_features.append(feature)
                non_zero_feature_values.append(coefficient)

        # initialize the coefficient data frame with just the intercept
        df_coef = pd.DataFrame([('Intercept', model.intercept_)])
        df_coef = df_coef.append(list(zip(non_zero_features,
                                          non_zero_feature_values)), ignore_index=True)
        df_coef.columns = ['feature', 'coefficient']

        # create a fake SKLL learner with these non-zero weights
        learner = self.create_fake_skll_learner(df_coef)

        # there's no OLS fit object in this case
        fit = None

        # we used only the non-zero features
        used_features = non_zero_features

        return learner, fit, df_coef, used_features 
Example #29
Source File: modeler.py    From rsmtool with Apache License 2.0 4 votes vote down vote up
def train_positive_lasso_cv_then_lr(self, df_train, feature_columns):
        """
        Train `PositiveLassoCVThenLR` (formerly empWtLassoBest) -
        First do feature selection using lasso regression optimized
        for log likelihood using cross validation and then use only
        those features to train a second linear regression

        Parameters
        ----------
        df_train : pd.DataFrame
            Data frame containing the features on which
            to train the model.
        feature_columns : list
            A list of feature columns to use in training the model.

        Returns
        -------
        learner : skll.Learner
            The SKLL learner object
        fit : statsmodels.RegressionResults
            A statsmodels regression results object.
        df_coef : pd.DataFrame
            The model coefficients in a data_frame
        used_features : list
            A list of features used in the final model.
        """
        # train a LassoCV outside of SKLL since it's not exposed there
        X = df_train[feature_columns].values
        y = df_train['sc1'].values
        clf = LassoCV(cv=10, positive=True, random_state=1234567890)
        model = clf.fit(X, y)

        # get the non-zero features from this model
        non_zero_features = []
        for feature, coefficient in zip(feature_columns, model.coef_):
            if coefficient != 0:
                non_zero_features.append(feature)

        # now train a new linear regression with just these non-zero features
        X = df_train[non_zero_features]
        X = sm.add_constant(X)
        fit = sm.OLS(df_train['sc1'], X).fit()

        # convert the model parameters into a data frame
        df_coef = self.ols_coefficients_to_dataframe(fit.params)

        # create fake SKLL learner with these coefficients
        learner = self.create_fake_skll_learner(df_coef)

        # we used only the non-zero features
        used_features = non_zero_features

        return learner, fit, df_coef, used_features