Python sklearn.linear_model.Lasso() Examples

The following are 30 code examples of sklearn.linear_model.Lasso(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.linear_model , or try the search function .
Example #1
Source File: test_multioutput.py    From Mastering-Elasticsearch-7.0 with MIT License 8 votes vote down vote up
def test_multi_target_regression_partial_fit():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test, y_test = X[50:], y[50:]

    references = np.zeros_like(y_test)
    half_index = 25
    for n in range(3):
        sgr = SGDRegressor(random_state=0, max_iter=5)
        sgr.partial_fit(X_train[:half_index], y_train[:half_index, n])
        sgr.partial_fit(X_train[half_index:], y_train[half_index:, n])
        references[:, n] = sgr.predict(X_test)

    sgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5))

    sgr.partial_fit(X_train[:half_index], y_train[:half_index])
    sgr.partial_fit(X_train[half_index:], y_train[half_index:])

    y_pred = sgr.predict(X_test)
    assert_almost_equal(references, y_pred)
    assert not hasattr(MultiOutputRegressor(Lasso), 'partial_fit') 
Example #2
Source File: friedman_scores.py    From mlens with MIT License 7 votes vote down vote up
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens 
Example #3
Source File: test_stability_selection.py    From stability-selection with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_stability_selection_regression():
    n, p, k = 500, 1000, 5

    X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)

    base_estimator = Pipeline([
        ('scaler', StandardScaler()),
        ('model', Lasso())
    ])

    lambdas_grid = np.logspace(-1, 1, num=10)

    selector = StabilitySelection(base_estimator=base_estimator,
                                  lambda_name='model__alpha',
                                  lambda_grid=lambdas_grid)
    selector.fit(X, y)

    chosen_betas = selector.get_support(indices=True)

    assert_almost_equal(important_betas, chosen_betas) 
Example #4
Source File: test_stability_selection.py    From stability-selection with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_with_complementary_pairs_bootstrap():
    n, p, k = 500, 1000, 5

    X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)

    base_estimator = Pipeline([
        ('scaler', StandardScaler()),
        ('model', Lasso())
    ])

    lambdas_grid = np.logspace(-1, 1, num=10)

    selector = StabilitySelection(base_estimator=base_estimator,
                                  lambda_name='model__alpha',
                                  lambda_grid=lambdas_grid,
                                  bootstrap_func='complementary_pairs')
    selector.fit(X, y)

    chosen_betas = selector.get_support(indices=True)

    assert_almost_equal(important_betas, chosen_betas) 
Example #5
Source File: test_stability_selection.py    From stability-selection with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_different_shape():
    n, p, k = 100, 200, 5

    X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)

    base_estimator = Pipeline([
        ('scaler', StandardScaler()),
        ('model', Lasso())
    ])

    lambdas_grid = np.logspace(-1, 1, num=10)

    selector = StabilitySelection(base_estimator=base_estimator,
                                  lambda_name='model__alpha',
                                  lambda_grid=lambdas_grid)
    selector.fit(X, y)
    selector.transform(X[:, :-2]) 
Example #6
Source File: test_stability_selection.py    From stability-selection with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_no_features():
    n, p, k = 100, 200, 0

    X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)

    base_estimator = Pipeline([
        ('scaler', StandardScaler()),
        ('model', Lasso())
    ])

    lambdas_grid = np.logspace(-1, 1, num=10)

    selector = StabilitySelection(base_estimator=base_estimator,
                                  lambda_name='model__alpha',
                                  lambda_grid=lambdas_grid)
    selector.fit(X, y)

    assert_almost_equal(selector.transform(X),
                        np.empty(0).reshape((X.shape[0], 0))) 
Example #7
Source File: test_stability_selection.py    From stability-selection with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_stability_plot():
    n, p, k = 500, 200, 5

    X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)

    base_estimator = Pipeline([
        ('scaler', StandardScaler()),
        ('model', Lasso())
    ])

    lambdas_grid = np.logspace(-1, 1, num=10)

    selector = StabilitySelection(base_estimator=base_estimator,
                                  lambda_name='model__alpha',
                                  lambda_grid=lambdas_grid)
    selector.fit(X, y)

    plot_stability_path(selector, threshold_highlight=0.5) 
Example #8
Source File: test_target.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_transform_target_regressor_error():
    X, y = friedman
    # provide a transformer and functions at the same time
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      transformer=StandardScaler(),
                                      func=np.exp, inverse_func=np.log)
    assert_raises_regex(ValueError, "'transformer' and functions"
                        " 'func'/'inverse_func' cannot both be set.",
                        regr.fit, X, y)
    # fit with sample_weight with a regressor which does not support it
    sample_weight = np.ones((y.shape[0],))
    regr = TransformedTargetRegressor(regressor=Lasso(),
                                      transformer=StandardScaler())
    assert_raises_regex(TypeError, r"fit\(\) got an unexpected keyword "
                        "argument 'sample_weight'", regr.fit, X, y,
                        sample_weight=sample_weight)
    # func is given but inverse_func is not
    regr = TransformedTargetRegressor(func=np.exp)
    assert_raises_regex(ValueError, "When 'func' is provided, 'inverse_func'"
                        " must also be provided", regr.fit, X, y) 
Example #9
Source File: test_least_angle.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_rank_deficient_design():
    # consistency test that checks that LARS Lasso is handling rank
    # deficient input data (with n_features < rank) in the same way
    # as coordinate descent Lasso
    y = [5, 0, 5]
    for X in (
              [[5, 0],
               [0, 5],
               [10, 10]],
              [[10, 10, 0],
               [1e-32, 0, 0],
               [0, 0, 1]]
             ):
        # To be able to use the coefs to compute the objective function,
        # we need to turn off normalization
        lars = linear_model.LassoLars(.1, normalize=False)
        coef_lars_ = lars.fit(X, y).coef_
        obj_lars = (1. / (2. * 3.)
                    * linalg.norm(y - np.dot(X, coef_lars_)) ** 2
                    + .1 * linalg.norm(coef_lars_, 1))
        coord_descent = linear_model.Lasso(.1, tol=1e-6, normalize=False)
        coef_cd_ = coord_descent.fit(X, y).coef_
        obj_cd = ((1. / (2. * 3.)) * linalg.norm(y - np.dot(X, coef_cd_)) ** 2
                  + .1 * linalg.norm(coef_cd_, 1))
        assert_less(obj_lars, obj_cd * (1. + 1e-8)) 
Example #10
Source File: test_least_angle.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_lasso_lars_vs_lasso_cd_early_stopping():
    # Test that LassoLars and Lasso using coordinate descent give the
    # same results when early stopping is used.
    # (test : before, in the middle, and in the last part of the path)
    alphas_min = [10, 0.9, 1e-4]

    for alpha_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                       alpha_min=alpha_min)
        lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y)
        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert_less(error, 0.01)

    # same test, with normalization
    for alpha_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                       alpha_min=alpha_min)
        lasso_cd = linear_model.Lasso(fit_intercept=True, normalize=True,
                                      tol=1e-8)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y)
        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert_less(error, 0.01) 
Example #11
Source File: model_recommendation.py    From DIVE-backend with GNU General Public License v3.0 6 votes vote down vote up
def lasso(df, dependent_variable, independent_variables, interaction_terms=[], model_limit=5):
    considered_independent_variables_per_model, patsy_models = \
    construct_models(df, dependent_variable, independent_variables, interaction_terms, table_layout=MCT.ALL_VARIABLES.value)
    y, X = dmatrices(patsy_models[0], df, return_type='dataframe')

    clf = linear_model.Lasso(
        alpha = 1.0,
        normalize=True
    )
    clf.fit(X, y)
    fit_coef = clf.coef_
    column_means = np.apply_along_axis(np.mean, 1, X)

    selected_variables = [ independent_variable for (i, independent_variable) in enumerate(independent_variables) if ( abs(fit_coef[i]) >= column_means[i] ) ]

    return selected_variables 
Example #12
Source File: scikitlearn.py    From sia-cog with MIT License 6 votes vote down vote up
def getModels():
    result = []
    result.append("LinearRegression")
    result.append("BayesianRidge")
    result.append("ARDRegression")
    result.append("ElasticNet")
    result.append("HuberRegressor")
    result.append("Lasso")
    result.append("LassoLars")
    result.append("Rigid")
    result.append("SGDRegressor")
    result.append("SVR")
    result.append("MLPClassifier")
    result.append("KNeighborsClassifier")
    result.append("SVC")
    result.append("GaussianProcessClassifier")
    result.append("DecisionTreeClassifier")
    result.append("RandomForestClassifier")
    result.append("AdaBoostClassifier")
    result.append("GaussianNB")
    result.append("LogisticRegression")
    result.append("QuadraticDiscriminantAnalysis")
    return result 
Example #13
Source File: gd_poisoners.py    From manip-ml with MIT License 6 votes vote down vote up
def comp_attack_vld(self,clf,wxc,bxc,wyc,byc,otherargs):
        n = self.vldx.shape[0]
        res = (clf.predict(self.vldx)-self.vldy)

        gradx = np.dot(self.vldx, wxc)   + bxc
        grady = np.dot(self.vldx, wyc.T) + byc

        attackx = np.dot(res,gradx) / n
        attacky = np.dot(res,grady) / n

        return attackx, attacky


############################################################################################
# Implements GD Poisoning for Lasso Linear Regression
############################################################################################ 
Example #14
Source File: gd_poisoners.py    From manip-ml with MIT License 6 votes vote down vote up
def learn_model(self, x, y, clf, lam = None):
        if (lam is None and self.initlam != -1): # hack for first training
            lam = self.initlam
        if clf is None:
            if lam is None:
                clf = linear_model.LassoCV(max_iter=10000)
                clf.fit(x, y)
                lam = clf.alpha_
            clf = linear_model.Lasso(alpha = lam, \
                                 max_iter = 10000, \
                                 warm_start = True)
        clf.fit(x, y)
        return clf, lam


############################################################################################
# Implements GD Poisoning for Ridge Linear Regression
############################################################################################ 
Example #15
Source File: udr.py    From disentanglement_lib with Apache License 2.0 6 votes vote down vote up
def lasso_correlation_matrix(vec1, vec2, random_state=None):
  """Computes correlation matrix of two representations using Lasso Regression.

  Args:
    vec1: 2d array of representations with axis 0 the batch dimension and axis 1
      the representation dimension.
    vec2: 2d array of representations with axis 0 the batch dimension and axis 1
      the representation dimension.
    random_state: int used to seed an RNG used for model training.

  Returns:
    A 2d array with the correlations between all pairwise combinations of
    elements of both representations are computed. Elements of vec1 correspond
    to axis 0 and elements of vec2 correspond to axis 1.
  """
  assert vec1.shape == vec2.shape
  model = linear_model.Lasso(random_state=random_state, alpha=0.1)
  model.fit(vec1, vec2)
  return np.transpose(np.absolute(model.coef_)) 
Example #16
Source File: test_timegapsplit.py    From scikit-lego with MIT License 6 votes vote down vote up
def test_timegapsplit_with_gridsearch():

    cv = TimeGapSplit(
        date_serie=df["date"],
        train_duration=timedelta(days=5),
        valid_duration=timedelta(days=3),
        gap_duration=timedelta(days=0),
    )

    Lasso(random_state=0, tol=0.1, alpha=0.8).fit(X_train, y_train)

    pipe = Pipeline([("reg", Lasso(random_state=0, tol=0.1))])
    alphas = [0.1, 0.5, 0.8]
    grid = GridSearchCV(pipe, {"reg__alpha": alphas}, cv=cv)
    grid.fit(X_train, y_train)
    best_C = grid.best_estimator_.get_params()["reg__alpha"]

    assert best_C 
Example #17
Source File: feature_transformer.py    From py_ml_utils with Apache License 2.0 6 votes vote down vote up
def __init__(self,
                 feature_name=None,
                 regressor=Lasso(),
                 noise_level=None,
                 drop_level=None,
                 keep_dum_cols_with_nan=True):

        # Call super
        super(CategoricalRegressorTransformation, self).__init__(feature_name)
        self._process_name = "Categorical_Regressor"
        # Keep average type
        self.noise_level = noise_level
        # Place-holder for averages
        self.regressor = regressor
        self.dum_tf = DummyTransformation(feature_name=feature_name,
                                          drop_level=drop_level,
                                          noise_level=None,
                                          keep_dum_cols_with_nan=keep_dum_cols_with_nan)
        self.oof_process = False
        self.fit_columns = None 
Example #18
Source File: test_sklearn_grid_search_cv_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_grid_search_regression_int(self):
        tuned_parameters = [{'alpha': np.logspace(-4, -0.5, 4)}]
        clf = GridSearchCV(Lasso(max_iter=100),
                           tuned_parameters, cv=5)
        model, X = fit_regression_model(clf, is_int=True)
        model_onnx = convert_sklearn(
            model, "GridSearchCV",
            [("input", Int64TensorType([None, X.shape[1]]))])
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnGridSerachRegressionInt-OneOffArray-Dec4",
            allow_failure="StrictVersion("
            "onnxruntime.__version__) "
            "<= StrictVersion('0.2.1') or "
            "StrictVersion(onnx.__version__) "
            "== StrictVersion('1.4.1')",
        ) 
Example #19
Source File: test_search_2.py    From spark-sklearn with Apache License 2.0 6 votes vote down vote up
def test_cv_pipeline(self):
        pipeline = SKL_Pipeline([
            ('vect', SKL_HashingVectorizer(n_features=20)),
            ('tfidf', SKL_TfidfTransformer(use_idf=False)),
            ('lasso', SKL_Lasso())
        ])
        parameters = {
            'lasso__alpha': (0.001, 0.005, 0.01)
        }
        grid_search = GridSearchCV(self.sc, pipeline, parameters)
        data = [('hi there', 0.0),
                ('what is up', 1.0),
                ('huh', 1.0),
                ('now is the time', 5.0),
                ('for what', 0.0),
                ('the spark was there', 5.0),
                ('and so', 3.0),
                ('were many socks', 0.0),
                ('really', 1.0),
                ('too cool', 2.0)]
        df = self.sql.createDataFrame(data, ["review", "rating"]).toPandas()
        skl_gs = grid_search.fit(df.review.values, df.rating.values)
        assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha']) 
Example #20
Source File: test_multioutput.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_multi_target_regression_partial_fit():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test, y_test = X[50:], y[50:]

    references = np.zeros_like(y_test)
    half_index = 25
    for n in range(3):
        sgr = SGDRegressor(random_state=0, max_iter=5)
        sgr.partial_fit(X_train[:half_index], y_train[:half_index, n])
        sgr.partial_fit(X_train[half_index:], y_train[half_index:, n])
        references[:, n] = sgr.predict(X_test)

    sgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5))

    sgr.partial_fit(X_train[:half_index], y_train[:half_index])
    sgr.partial_fit(X_train[half_index:], y_train[half_index:])

    y_pred = sgr.predict(X_test)
    assert_almost_equal(references, y_pred)
    assert_false(hasattr(MultiOutputRegressor(Lasso), 'partial_fit')) 
Example #21
Source File: test_least_angle.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_rank_deficient_design():
    # consistency test that checks that LARS Lasso is handling rank
    # deficient input data (with n_features < rank) in the same way
    # as coordinate descent Lasso
    y = [5, 0, 5]
    for X in ([[5, 0],
               [0, 5],
               [10, 10]],

              [[10, 10, 0],
               [1e-32, 0, 0],
               [0, 0, 1]],
              ):
        # To be able to use the coefs to compute the objective function,
        # we need to turn off normalization
        lars = linear_model.LassoLars(.1, normalize=False)
        coef_lars_ = lars.fit(X, y).coef_
        obj_lars = (1. / (2. * 3.)
                    * linalg.norm(y - np.dot(X, coef_lars_)) ** 2
                    + .1 * linalg.norm(coef_lars_, 1))
        coord_descent = linear_model.Lasso(.1, tol=1e-6, normalize=False)
        coef_cd_ = coord_descent.fit(X, y).coef_
        obj_cd = ((1. / (2. * 3.)) * linalg.norm(y - np.dot(X, coef_cd_)) ** 2
                  + .1 * linalg.norm(coef_cd_, 1))
        assert_less(obj_lars, obj_cd * (1. + 1e-8)) 
Example #22
Source File: test_least_angle.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
    # Test that LassoLars and Lasso using coordinate descent give the
    # same results when early stopping is used.
    # (test : before, in the middle, and in the last part of the path)
    alphas_min = [10, 0.9, 1e-4]

    for alpha_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                       alpha_min=alpha_min)
        lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y)
        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert_less(error, 0.01)

    # same test, with normalization
    for alpha_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                       alpha_min=alpha_min)
        lasso_cd = linear_model.Lasso(fit_intercept=True, normalize=True,
                                      tol=1e-8)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y)
        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert_less(error, 0.01) 
Example #23
Source File: advanced_supvervised_model_trainer.py    From healthcareai-py with MIT License 5 votes vote down vote up
def lasso_regression(self, scoring_metric='neg_mean_squared_error',
                         hyperparameter_grid=None,
                         randomized_search=True,
                         number_iteration_samples=2):
        """
        A light wrapper for Sklearn's lasso regression that performs randomized 
        search over an overridable default hyperparameter grid.

        Args:
            scoring_metric (str): Any sklearn scoring metric appropriate for regression
            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)

            number_iteration_samples (int): Number of models to train during the 
            randomized search for exploring the hyperparameter space. More may lead 
            to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel:
        """
        self.validate_regression('Lasso Regression')
        if hyperparameter_grid is None:
            hyperparameter_grid = {"fit_intercept": [True, False]}
            number_iteration_samples = 2

        algorithm = get_algorithm(Lasso,
                                  scoring_metric,
                                  hyperparameter_grid,
                                  randomized_search,
                                  number_iteration_samples=number_iteration_samples)

        trained_supervised_model = self._create_trained_supervised_model(algorithm)

        return trained_supervised_model 
Example #24
Source File: utils.py    From csgm with MIT License 5 votes vote down vote up
def solve_lasso(A_val, y_val, hparams):
    if hparams.lasso_solver == 'sklearn':
        lasso_est = Lasso(alpha=hparams.lmbd)
        lasso_est.fit(A_val.T, y_val.reshape(hparams.num_measurements))
        x_hat = lasso_est.coef_
        x_hat = np.reshape(x_hat, [-1])
    if hparams.lasso_solver == 'cvxopt':
        A_mat = matrix(A_val.T)
        y_mat = matrix(y_val)
        x_hat_mat = l1regls(A_mat, y_mat)
        x_hat = np.asarray(x_hat_mat)
        x_hat = np.reshape(x_hat, [-1])
    return x_hat 
Example #25
Source File: friedman_memory.py    From mlens with MIT License 5 votes vote down vote up
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)

    est = [ElasticNet(copy_X=False),
           Lasso(copy_X=False)]

    ens.add(est)
    ens.add(KNeighborsRegressor())

    return ens 
Example #26
Source File: friedman_memory.py    From mlens with MIT License 5 votes vote down vote up
def lasso():
    """Fit Lasso."""
    print("Fitting LAS...", end=" ", flush=True)
    time.sleep(SLEEP)
    t0 = time.time()
    ls = Lasso()
    ls.fit(X, y)
    print_time(t0, "Done", end="") 
Example #27
Source File: friedman_memory.py    From mlens with MIT License 5 votes vote down vote up
def elasticnet():
    """Fit Elastic Net."""
    print("Fitting ELN...", end=" ", flush=True)
    time.sleep(SLEEP)
    t0 = time.time()
    ls = Lasso()
    ls.fit(X, y)
    print_time(t0, "Done", end="") 
Example #28
Source File: scale_ens.py    From mlens with MIT License 5 votes vote down vote up
def build_ensemble(kls, **kwargs):
    """Generate ensemble of class kls."""

    ens = kls(**kwargs)
    ens.add([SVR(), RandomForestRegressor(),
             GradientBoostingRegressor(), Lasso(copy_X=False),
             MLPRegressor(shuffle=False, alpha=0.001)])
    ens.add_meta(Lasso(copy_X=False))
    return ens 
Example #29
Source File: test_from_model.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_max_features():
    # Test max_features parameter using various values
    X, y = datasets.make_classification(
        n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
        n_repeated=0, shuffle=False, random_state=0)
    max_features = X.shape[1]
    est = RandomForestClassifier(n_estimators=50, random_state=0)

    transformer1 = SelectFromModel(estimator=est,
                                   threshold=-np.inf)
    transformer2 = SelectFromModel(estimator=est,
                                   max_features=max_features,
                                   threshold=-np.inf)
    X_new1 = transformer1.fit_transform(X, y)
    X_new2 = transformer2.fit_transform(X, y)
    assert_allclose(X_new1, X_new2)

    # Test max_features against actual model.
    transformer1 = SelectFromModel(estimator=Lasso(alpha=0.025,
                                                   random_state=42))
    X_new1 = transformer1.fit_transform(X, y)
    scores1 = np.abs(transformer1.estimator_.coef_)
    candidate_indices1 = np.argsort(-scores1, kind='mergesort')

    for n_features in range(1, X_new1.shape[1] + 1):
        transformer2 = SelectFromModel(estimator=Lasso(alpha=0.025,
                                       random_state=42),
                                       max_features=n_features,
                                       threshold=-np.inf)
        X_new2 = transformer2.fit_transform(X, y)
        scores2 = np.abs(transformer2.estimator_.coef_)
        candidate_indices2 = np.argsort(-scores2, kind='mergesort')
        assert_allclose(X[:, candidate_indices1[:n_features]],
                        X[:, candidate_indices2[:n_features]])
    assert_allclose(transformer1.estimator_.coef_,
                    transformer2.estimator_.coef_) 
Example #30
Source File: test_from_model.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_coef_default_threshold():
    X, y = datasets.make_classification(
        n_samples=100, n_features=10, n_informative=3, n_redundant=0,
        n_repeated=0, shuffle=False, random_state=0)

    # For the Lasso and related models, the threshold defaults to 1e-5
    transformer = SelectFromModel(estimator=Lasso(alpha=0.1,
                                  random_state=42))
    transformer.fit(X, y)
    X_new = transformer.transform(X)
    mask = np.abs(transformer.estimator_.coef_) > 1e-5
    assert_array_almost_equal(X_new, X[:, mask])