Python sklearn.datasets.make_regression() Examples

The following are 30 code examples for showing how to use sklearn.datasets.make_regression(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.datasets , or try the search function .

Example 1
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_multioutput.py    License: MIT License 8 votes vote down vote up
def test_multi_target_regression_partial_fit():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test, y_test = X[50:], y[50:]

    references = np.zeros_like(y_test)
    half_index = 25
    for n in range(3):
        sgr = SGDRegressor(random_state=0, max_iter=5)
        sgr.partial_fit(X_train[:half_index], y_train[:half_index, n])
        sgr.partial_fit(X_train[half_index:], y_train[half_index:, n])
        references[:, n] = sgr.predict(X_test)

    sgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5))

    sgr.partial_fit(X_train[:half_index], y_train[:half_index])
    sgr.partial_fit(X_train[half_index:], y_train[half_index:])

    y_pred = sgr.predict(X_test)
    assert_almost_equal(references, y_pred)
    assert not hasattr(MultiOutputRegressor(Lasso), 'partial_fit') 
Example 2
Project: pygbm   Author: ogrisel   File: test_gradient_boosting.py    License: MIT License 7 votes vote down vote up
def test_early_stopping_regression(scoring, validation_split,
                                   n_iter_no_change, tol):

    max_iter = 500

    X, y = make_regression(random_state=0)

    gb = GradientBoostingRegressor(verbose=1,  # just for coverage
                                   scoring=scoring,
                                   tol=tol,
                                   validation_split=validation_split,
                                   max_iter=max_iter,
                                   n_iter_no_change=n_iter_no_change,
                                   random_state=0)
    gb.fit(X, y)

    if n_iter_no_change is not None:
        assert n_iter_no_change <= gb.n_iter_ < max_iter
    else:
        assert gb.n_iter_ == max_iter 
Example 3
Project: nistats   Author: nilearn   File: test_contrasts.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_fixed_effect_contrast_nonzero_effect():
    X, y = make_regression(n_features=5, n_samples=20, random_state=0)
    y = y[:, None]
    labels, results = run_glm(y, X, 'ols')
    coef = LinearRegression(fit_intercept=False).fit(X, y).coef_
    for i in range(X.shape[1]):
        contrast = np.zeros(X.shape[1])
        contrast[i] = 1.
        fixed_effect = _compute_fixed_effect_contrast([labels],
                                                      [results],
                                                      [contrast],
                                                      )
        assert_almost_equal(fixed_effect.effect_size(), coef.ravel()[i])
        fixed_effect = _compute_fixed_effect_contrast(
            [labels] * 3, [results] * 3, [contrast] * 3)
        assert_almost_equal(fixed_effect.effect_size(), coef.ravel()[i]) 
Example 4
Project: nyaggle   Author: nyanp   File: util.py    License: MIT License 6 votes vote down vote up
def make_regression_df(n_samples: int = 1024,
                       n_num_features: int = 20,
                       n_cat_features: int = 0,
                       feature_name: str = 'col_{}',
                       target_name: str = 'target',
                       random_state: int = 0,
                       id_column: str = None) -> Tuple[pd.DataFrame, pd.Series]:
    np.random.seed(random_state)
    X, y = make_regression(n_samples=n_samples, n_features=n_num_features,
                           random_state=random_state)

    X = pd.DataFrame(X, columns=[feature_name.format(i) for i in range(n_num_features)])
    y = pd.Series(y, name=target_name)

    if id_column is not None:
        X[id_column] = range(n_samples)

    for i in range(n_cat_features):
        X['cat_{}'.format(i)] = \
            pd.Series(np.random.choice(['A', 'B', None], size=n_samples)).astype(str).astype('category')

    return X, y 
Example 5
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_gradient_boosting.py    License: MIT License 6 votes vote down vote up
def test_early_stopping_regression(scoring, validation_fraction,
                                   n_iter_no_change, tol):

    max_iter = 200

    X, y = make_regression(random_state=0)

    gb = HistGradientBoostingRegressor(
        verbose=1,  # just for coverage
        min_samples_leaf=5,  # easier to overfit fast
        scoring=scoring,
        tol=tol,
        validation_fraction=validation_fraction,
        max_iter=max_iter,
        n_iter_no_change=n_iter_no_change,
        random_state=0
    )
    gb.fit(X, y)

    if n_iter_no_change is not None:
        assert n_iter_no_change <= gb.n_iter_ < max_iter
    else:
        assert gb.n_iter_ == max_iter 
Example 6
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_mlp.py    License: MIT License 6 votes vote down vote up
def test_shuffle():
    # Test that the shuffle parameter affects the training process (it should)
    X, y = make_regression(n_samples=50, n_features=5, n_targets=1,
                           random_state=0)

    # The coefficients will be identical if both do or do not shuffle
    for shuffle in [True, False]:
        mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
                            random_state=0, shuffle=shuffle)
        mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
                            random_state=0, shuffle=shuffle)
        mlp1.fit(X, y)
        mlp2.fit(X, y)

        assert np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0])

    # The coefficients will be slightly different if shuffle=True
    mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
                        random_state=0, shuffle=True)
    mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
                        random_state=0, shuffle=False)
    mlp1.fit(X, y)
    mlp2.fit(X, y)

    assert not np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0]) 
Example 7
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    neg_mse_scores = cross_val_score(reg, X, y, cv=5,
                                     scoring="neg_mean_squared_error")
    expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2)

    # Explained variance
    scoring = make_scorer(explained_variance_score)
    ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) 
Example 8
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_multioutput.py    License: MIT License 6 votes vote down vote up
def test_multi_target_regression():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test, y_test = X[50:], y[50:]

    references = np.zeros_like(y_test)
    for n in range(3):
        rgr = GradientBoostingRegressor(random_state=0)
        rgr.fit(X_train, y_train[:, n])
        references[:, n] = rgr.predict(X_test)

    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X_train, y_train)
    y_pred = rgr.predict(X_test)

    assert_almost_equal(references, y_pred)


# 0.23. warning about tol not having its correct default value. 
Example 9
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_ridge.py    License: MIT License 6 votes vote down vote up
def test_ridge_fit_intercept_sparse():
    X, y = make_regression(n_samples=1000, n_features=2, n_informative=2,
                           bias=10., random_state=42)

    X_csr = sp.csr_matrix(X)

    for solver in ['sag', 'sparse_cg']:
        dense = Ridge(alpha=1., tol=1.e-15, solver=solver, fit_intercept=True)
        sparse = Ridge(alpha=1., tol=1.e-15, solver=solver, fit_intercept=True)
        dense.fit(X, y)
        with pytest.warns(None) as record:
            sparse.fit(X_csr, y)
        assert len(record) == 0
        assert_almost_equal(dense.intercept_, sparse.intercept_)
        assert_array_almost_equal(dense.coef_, sparse.coef_)

    # test the solver switch and the corresponding warning
    for solver in ['saga', 'lsqr']:
        sparse = Ridge(alpha=1., tol=1.e-15, solver=solver, fit_intercept=True)
        assert_raises_regex(ValueError, "In Ridge,", sparse.fit, X_csr, y) 
Example 10
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_samples_generator.py    License: MIT License 6 votes vote down vote up
def test_make_regression():
    X, y, c = make_regression(n_samples=100, n_features=10, n_informative=3,
                              effective_rank=5, coef=True, bias=0.0,
                              noise=1.0, random_state=0)

    assert_equal(X.shape, (100, 10), "X shape mismatch")
    assert_equal(y.shape, (100,), "y shape mismatch")
    assert_equal(c.shape, (10,), "coef shape mismatch")
    assert_equal(sum(c != 0.0), 3, "Unexpected number of informative features")

    # Test that y ~= np.dot(X, c) + bias + N(0, 1.0).
    assert_almost_equal(np.std(y - np.dot(X, c)), 1.0, decimal=1)

    # Test with small number of features.
    X, y = make_regression(n_samples=100, n_features=1)  # n_informative=3
    assert_equal(X.shape, (100, 1)) 
Example 11
Project: muffnn   Author: civisanalytics   File: test_mlp_regressor.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_prediction_gradient():
    """Test computation of prediction gradients."""
    mlp = MLPRegressor(n_epochs=100, random_state=42, hidden_units=(5,))
    X, y = make_regression(
        n_samples=1000, n_features=10, n_informative=1, shuffle=False)
    mlp.fit(X, y)
    grad = mlp.prediction_gradient(X)
    grad_means = grad.mean(axis=0)
    assert grad.shape == X.shape
    # Check that only the informative feature has a large gradient.
    assert np.abs(grad_means[0]) > 0.5
    for m in grad_means[1:]:
        assert np.abs(m) < 0.1

    # Raise an exception for sparse inputs, which are not yet supported.
    X_sp = sp.csr_matrix(X)
    mlp.fit(X_sp, y)
    with pytest.raises(NotImplementedError):
        mlp.prediction_gradient(X_sp) 
Example 12
Project: civisml-extensions   Author: civisanalytics   File: test_stacking.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_smoke_multiout_regression_methods(n_jobs):
    """Construct, fit, and predict on realistic problem.
    """
    X, y = make_regression(random_state=7, n_samples=100, n_features=10,
                           n_informative=4, n_targets=2)

    rng = np.random.RandomState(17)
    est_list = [('lr', LinearRegression()),
                ('rf', RandomForestRegressor(random_state=rng,
                                             n_estimators=10)),
                ('metalr', LinearRegression())]
    sm = StackedRegressor(est_list, n_jobs=n_jobs)
    sm.fit(X, y)
    sm.predict(X)
    sm.score(X, y)

    with pytest.raises(AttributeError):
        sm.predict_proba(X) 
Example 13
Project: pyglmnet   Author: glm-tools   File: test_pyglmnet.py    License: MIT License 6 votes vote down vote up
def test_cv():
    """Simple CV check."""
    # XXX: don't use scikit-learn for tests.
    X, y = make_regression()
    cv = KFold(n_splits=5)

    glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1)
    # check that it returns 5 scores
    scores = cross_val_score(glm_normal, X, y, cv=cv)
    assert(len(scores) == 5)

    param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)},
                  {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01),
                                             10, base=np.exp(1))}]
    glmcv = GridSearchCV(glm_normal, param_grid, cv=cv)
    glmcv.fit(X, y) 
Example 14
Project: mljar-supervised   Author: mljar   File: test_nn.py    License: MIT License 6 votes vote down vote up
def setUpClass(cls):
        cls.X, cls.y = datasets.make_regression(
            n_samples=100, n_features=5, n_informative=4, shuffle=False, random_state=0
        )

        cls.params = {
            "dense_layers": 2,
            "dense_1_size": 8,
            "dense_2_size": 4,
            "dropout": 0,
            "learning_rate": 0.01,
            "momentum": 0.9,
            "decay": 0.001,
            "ml_task": "regression"
        }

        cls.y = preprocessing.scale(cls.y) 
Example 15
Project: sklearn-onnx   Author: onnx   File: test_parsing_options.py    License: MIT License 6 votes vote down vote up
def test_kmeans(self):
        model = KMeans()
        X, y = make_regression(n_features=4, random_state=42)
        model.fit(X, y)
        initial_types = [('input', FloatTensorType((None, X.shape[1])))]
        with self.assertRaises(RuntimeError):
            convert_sklearn(model, initial_types=initial_types,
                            final_types=[('output4', None)])
        with self.assertRaises(RuntimeError):
            convert_sklearn(model, initial_types=initial_types,
                            final_types=[('dup1', None), ('dup1', None)],
                            target_opset=TARGET_OPSET)
        model_onnx = convert_sklearn(
            model, initial_types=initial_types,
            final_types=[('output4', None), ('output5', None)],
            target_opset=TARGET_OPSET)
        assert model_onnx is not None
        sess = InferenceSession(model_onnx.SerializeToString())
        assert sess.get_outputs()[0].name == 'output4'
        assert sess.get_outputs()[1].name == 'output5' 
Example 16
Project: cs-ranking   Author: kiudee   File: discrete_choice_data_generator.py    License: Apache License 2.0 6 votes vote down vote up
def make_linear_transitive(
        self,
        n_instances=1000,
        n_objects=5,
        noise=0.0,
        n_features=100,
        n_informative=10,
        seed=42,
        **kwd,
    ):
        random_state = check_random_state(seed=seed)
        X, y, coeff = make_regression(
            n_samples=n_instances * n_objects,
            n_features=n_features,
            n_informative=n_informative,
            coef=True,
            noise=noise,
            random_state=random_state,
        )
        X = X.reshape(n_instances, n_objects, n_features)
        y = y.reshape(n_instances, n_objects)
        Y = y.argmax(axis=1)
        Y = convert_to_label_encoding(Y, n_objects)
        return X, Y 
Example 17
Project: cs-ranking   Author: kiudee   File: object_ranking_data_generator.py    License: Apache License 2.0 6 votes vote down vote up
def make_linear_transitive(
        self,
        n_instances=1000,
        n_objects=5,
        noise=0.0,
        n_features=100,
        n_informative=10,
        seed=42,
        **kwd,
    ):
        random_state = check_random_state(seed=seed)
        X, y, coeff = make_regression(
            n_samples=n_instances * n_objects,
            n_features=n_features,
            n_informative=n_informative,
            coef=True,
            noise=noise,
            random_state=random_state,
        )
        X = X.reshape(n_instances, n_objects, n_features)
        y = y.reshape(n_instances, n_objects)
        Y = scores_to_rankings(y)
        return X, Y 
Example 18
Project: Auto-PyTorch   Author: automl   File: data_manager.py    License: Apache License 2.0 6 votes vote down vote up
def generate_regression(self, num_features, num_samples, test_split=0.1, seed=0):
        """Generate a regression task
        
        Arguments:
            num_features {int} -- Number of features
            num_samples {int} -- Number of samples
        
        Keyword Arguments:
            test_split {float} -- Size of test split (default: {0.1})
            seed {int} -- a random seed (default: {0})
        """
        X, Y = make_regression(n_samples=num_samples, n_features=num_features, random_state=seed)
        self.categorical_features = [False] * num_features
        self.problem_type = ProblemType.FeatureRegression
        self.X, self.Y = X, Y
        self._split_data(test_split, seed) 
Example 19
Project: eland   Author: elastic   File: test_imported_ml_model_pytest.py    License: Apache License 2.0 6 votes vote down vote up
def test_decision_tree_regressor(self):
        # Train model
        training_data = datasets.make_regression(n_features=5)
        regressor = DecisionTreeRegressor()
        regressor.fit(training_data[0], training_data[1])

        # Get some test results
        test_data = [[0.1, 0.2, 0.3, -0.5, 1.0], [1.6, 2.1, -10, 50, -1.0]]
        test_results = regressor.predict(test_data)

        # Serialise the models to Elasticsearch
        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_decision_tree_regressor"

        es_model = ImportedMLModel(
            ES_TEST_CLIENT, model_id, regressor, feature_names, overwrite=True
        )
        es_results = es_model.predict(test_data)

        np.testing.assert_almost_equal(test_results, es_results, decimal=2)

        # Clean up
        es_model.delete_model() 
Example 20
Project: eland   Author: elastic   File: test_imported_ml_model_pytest.py    License: Apache License 2.0 6 votes vote down vote up
def test_random_forest_regressor(self):
        # Train model
        training_data = datasets.make_regression(n_features=5)
        regressor = RandomForestRegressor()
        regressor.fit(training_data[0], training_data[1])

        # Get some test results
        test_data = [[0.1, 0.2, 0.3, -0.5, 1.0], [1.6, 2.1, -10, 50, -1.0]]
        test_results = regressor.predict(test_data)

        # Serialise the models to Elasticsearch
        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_random_forest_regressor"

        es_model = ImportedMLModel(
            ES_TEST_CLIENT, model_id, regressor, feature_names, overwrite=True
        )
        es_results = es_model.predict(test_data)

        np.testing.assert_almost_equal(test_results, es_results, decimal=2)

        # Clean up
        es_model.delete_model() 
Example 21
Project: eland   Author: elastic   File: test_imported_ml_model_pytest.py    License: Apache License 2.0 6 votes vote down vote up
def test_xgb_regressor(self):
        # Train model
        training_data = datasets.make_regression(n_features=5)
        regressor = XGBRegressor()
        regressor.fit(training_data[0], training_data[1])

        # Get some test results
        test_data = [[0.1, 0.2, 0.3, -0.5, 1.0], [1.6, 2.1, -10, 50, -1.0]]
        test_results = regressor.predict(np.asarray(test_data))

        # Serialise the models to Elasticsearch
        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_xgb_regressor"

        es_model = ImportedMLModel(
            ES_TEST_CLIENT, model_id, regressor, feature_names, overwrite=True
        )

        es_results = es_model.predict(test_data)

        np.testing.assert_almost_equal(test_results, es_results, decimal=2)

        # Clean up
        es_model.delete_model() 
Example 22
Project: eland   Author: elastic   File: test_imported_ml_model_pytest.py    License: Apache License 2.0 6 votes vote down vote up
def test_predict_single_feature_vector(self):
        # Train model
        training_data = datasets.make_regression(n_features=1)
        regressor = XGBRegressor()
        regressor.fit(training_data[0], training_data[1])

        # Get some test results
        test_data = [[0.1]]
        test_results = regressor.predict(np.asarray(test_data))

        # Serialise the models to Elasticsearch
        feature_names = ["f0"]
        model_id = "test_xgb_regressor"

        es_model = ImportedMLModel(
            ES_TEST_CLIENT, model_id, regressor, feature_names, overwrite=True
        )

        # Single feature
        es_results = es_model.predict(test_data[0])

        np.testing.assert_almost_equal(test_results, es_results, decimal=2)

        # Clean up
        es_model.delete_model() 
Example 23
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    neg_mse_scores = cross_val_score(reg, X, y, cv=5,
                                     scoring="neg_mean_squared_error")
    expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2)

    # Explained variance
    scoring = make_scorer(explained_variance_score)
    ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) 
Example 24
Project: Kaggler   Author: jeongyoonlee   File: test_automl.py    License: MIT License 5 votes vote down vote up
def test_automl():
    X, y = make_regression(n_samples=N_OBS,
                           n_features=N_FEATURE,
                           n_informative=N_IMP_FEATURE,
                           random_state=RANDOM_SEED)
    X = pd.DataFrame(X, columns=['x{}'.format(i) for i in range(X.shape[1])])
    y = pd.Series(y)
    logging.info(X.shape, y.shape)

    X_trn, X_tst, y_trn, y_tst = train_test_split(X, y, test_size=.2, random_state=RANDOM_SEED)

    model = AutoLGB(objective='regression', metric='l1')
    model.tune(X_trn, y_trn)
    model.fit(X_trn, y_trn)
    p = model.predict(X_tst)
    r = (np.random.rand(X_tst.shape[0]) * (y_trn.max() - y_trn.min()) + y_trn.min())
    logging.info('MAE (LGB): {:.4f}'.format(mae(y_tst, p)))
    assert mae(y_tst, p) < mae(y_tst, r)

    model = AutoXGB(objective='reg:linear', metric='rmse')
    model.tune(X_trn, y_trn)
    model.fit(X_trn, y_trn)
    p = model.predict(X_tst)
    r = (np.random.rand(X_tst.shape[0]) * (y_trn.max() - y_trn.min()) + y_trn.min())
    logging.info('MAE (XGB): {:.4f}'.format(mae(y_tst, p)))
    assert mae(y_tst, p) < mae(y_tst, r) 
Example 25
Project: pygbm   Author: ogrisel   File: test_gradient_boosting.py    License: MIT License 5 votes vote down vote up
def test_pre_binned_data():
    # Make sure that:
    # - training on numerical data and predicting on numerical data is the
    #   same as training on binned data and predicting on binned data
    # - training on numerical data and predicting on numerical data is the
    #   same as training on numerical data and predicting on binned data
    # - training on binned data and predicting on numerical data is not
    #   possible.

    X, y = make_regression(random_state=0)
    gbdt = GradientBoostingRegressor(scoring=None, random_state=0)
    mapper = BinMapper(random_state=0)
    X_binned = mapper.fit_transform(X)

    fit_num_pred_num = gbdt.fit(X, y).predict(X)
    fit_binned_pred_binned = gbdt.fit(X_binned, y).predict(X_binned)
    fit_num_pred_binned = gbdt.fit(X, y).predict(X_binned)

    assert_allclose(fit_num_pred_num, fit_binned_pred_binned)
    assert_allclose(fit_num_pred_num, fit_num_pred_binned)

    assert_raises_regex(
        ValueError,
        'This estimator was fitted with pre-binned data ',
        gbdt.fit(X_binned, y).predict, X
    ) 
Example 26
Project: pygbm   Author: ogrisel   File: test_predictor.py    License: MIT License 5 votes vote down vote up
def test_pre_binned_data():
    # Make sure ValueError is raised when predictor.predict() is called while
    # the predictor does not have any numerical thresholds.

    X, y = make_regression()

    # Init gradients and hessians to that of least squares loss
    gradients = -y.astype(np.float32)
    hessians = np.ones(1, dtype=np.float32)

    mapper = BinMapper(random_state=0)
    X_binned = mapper.fit_transform(X)
    grower = TreeGrower(X_binned, gradients, hessians,
                        n_bins_per_feature=mapper.n_bins_per_feature_)
    grower.grow()
    predictor = grower.make_predictor(
        numerical_thresholds=None
    )

    assert_raises_regex(
        ValueError,
        'This predictor does not have numerical thresholds',
        predictor.predict, X
    )

    assert_raises_regex(
        ValueError,
        'binned_data dtype should be uint8',
        predictor.predict_binned, X
    )

    predictor.predict_binned(X_binned)  # No error

    predictor = grower.make_predictor(
        numerical_thresholds=mapper.numerical_thresholds_
    )
    assert_raises_regex(
        ValueError,
        'X has uint8 dtype',
        predictor.predict, X_binned
    ) 
Example 27
Project: snape   Author: mbernico   File: make_dataset.py    License: Apache License 2.0 5 votes vote down vote up
def create_regression_dataset(n_samples, n_features, n_informative, effective_rank, tail_strength,
                              noise, random_state=None, shuffle=True):
    """
    Creates a regression dataset

    :param n_samples: number of observations
    :param n_features: number of features
    :param n_informative: number of informative features
    :param n_targets: The number of regression targets, i.e., the dimension of the y output vector associated with a sample. By default, the output is a scalar.
    :param effective_rank: approximate number of singular vectors required to explain data
    :param tail_strength: relative importance of the fat noisy tail of the singular values profile
    :param noise: standard deviation of the gaussian noise applied to the output
    :param random_state: the numpy RandomState
    :param shuffle: shuffle the samples and the features.
    :return: the requested dataframe
    """
    random_state = get_random_state(random_state)
    X, y = make_regression(n_samples=n_samples, n_features=n_features, n_informative=n_informative,
                           n_targets=1, effective_rank=effective_rank, tail_strength=tail_strength,
                           noise=noise, random_state=random_state, shuffle=shuffle)

    # cast to a data frame
    df = pd.DataFrame(X)
    # rename X columns
    df = rename_columns(df)
    # and add the Y
    df['y'] = y
    return df 
Example 28
Project: scikit-learn-extra   Author: scikit-learn-contrib   File: test_eigenpro.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def gen_regression(params):
    """Generate a regression problem with make_regression
    where random_state=1"""
    return make_regression(**params, random_state=1) 
Example 29
Project: scikit-learn-extra   Author: scikit-learn-contrib   File: test_eigenpro.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_eigenpro_regression_duplicate_data():
    """Test the performance when some data is repeated"""
    X, y = make_regression(random_state=1)
    X, y = np.concatenate([X, X]), np.concatenate([y, y])
    prediction = (
        EigenProRegressor(
            kernel="rbf", n_epoch=100, gamma=0.02, random_state=1
        )
        .fit(X, y)
        .predict(X)
    )
    assert_allclose(prediction, y, rtol=5e-3) 
Example 30
Project: scikit-learn-extra   Author: scikit-learn-contrib   File: test_eigenpro.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_eigenpro_regression_conflict_data():
    """Make sure the regressor doesn't crash when conflicting
    data is given"""
    X, y = make_regression(random_state=1)
    y = np.reshape(y, (-1, 1))
    X, y = X, np.hstack([y, y + 2])
    # Make sure we don't throw an error when fitting or predicting
    EigenProRegressor(
        kernel="linear", n_epoch=5, gamma=0.5, random_state=1
    ).fit(X, y).predict(X)


# Tests for FastKernelClassification