Python sklearn.datasets.load_boston() Examples

The following are 30 code examples of sklearn.datasets.load_boston(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.datasets , or try the search function .
Example #1
Source File: test_learnersetting.py    From xcessiv with Apache License 2.0 8 votes vote down vote up
def setUp(self):
        self.X, self.y = load_boston(return_X_y=True)
        self.regressor_settings = [
            'sklearn_random_forest_regressor',
            'sklearn_extra_trees_regressor',
            'sklearn_bagging_regressor',
            'sklearn_GP_regressor',
            'sklearn_ridge_regressor',
            'sklearn_lasso_regressor',
            'sklearn_kernel_ridge_regressor',
            'sklearn_knn_regressor',
            'sklearn_svr_regressor',
            'sklearn_decision_tree_regressor',
            'sklearn_linear_regression',
            'sklearn_adaboost_regressor',
            'xgboost_regressor',
        ] 
Example #2
Source File: test_boosted_trees_regression.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not _HAS_XGBOOST:
            return
        if not _HAS_SKLEARN:
            return

        scikit_data = load_boston()
        dtrain = xgboost.DMatrix(
            scikit_data.data,
            label=scikit_data.target,
            feature_names=scikit_data.feature_names,
        )
        xgb_model = xgboost.train({}, dtrain, 1)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.xgb_model = xgb_model
        self.feature_names = self.scikit_data.feature_names 
Example #3
Source File: test_SVR.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_input_names(self):
        data = load_boston()
        df = pd.DataFrame({"input": data["data"].tolist()})
        df["input"] = df["input"].apply(np.array)

        # Default values
        spec = libsvm.convert(self.libsvm_model)
        if _is_macos() and _macos_version() >= (10, 13):
            (df["prediction"], _, _) = svmutil.svm_predict(
                data["target"], data["data"].tolist(), self.libsvm_model
            )
            metrics = evaluate_regressor(spec, df)
            self.assertAlmostEquals(metrics["max_error"], 0)

        # One extra parameters. This is legal/possible.
        num_inputs = len(data["data"][0])
        spec = libsvm.convert(self.libsvm_model, input_length=num_inputs + 1)

        # Not enought input names.
        input_names = ["this", "is", "not", "enought", "names"]
        with self.assertRaises(ValueError):
            libsvm.convert(self.libsvm_model, input_names=input_names)
        with self.assertRaises(ValueError):
            libsvm.convert(self.libsvm_model, input_length=num_inputs - 1) 
Example #4
Source File: test_SVR.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not _HAS_SKLEARN:
            return
        if not _HAS_LIBSVM:
            return

        scikit_data = load_boston()
        prob = svmutil.svm_problem(scikit_data["target"], scikit_data["data"].tolist())
        param = svmutil.svm_parameter()
        param.svm_type = svmutil.EPSILON_SVR
        param.kernel_type = svmutil.LINEAR
        param.eps = 1

        self.libsvm_model = svmutil.svm_train(prob, param) 
Example #5
Source File: test_composite_pipelines.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_boston_OHE_plus_trees(self):

        data = load_boston()

        pl = Pipeline(
            [
                ("OHE", OneHotEncoder(categorical_features=[8], sparse=False)),
                ("Trees", GradientBoostingRegressor(random_state=1)),
            ]
        )

        pl.fit(data.data, data.target)

        # Convert the model
        spec = convert(pl, data.feature_names, "target")

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(data.data, columns=data.feature_names)
            df["prediction"] = pl.predict(data.data)

            # Evaluate it
            result = evaluate_regressor(spec, df, "target", verbose=False)

            assert result["max_error"] < 0.0001 
Example #6
Source File: test_composite_pipelines.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_boston_OHE_plus_normalizer(self):

        data = load_boston()

        pl = Pipeline(
            [
                ("OHE", OneHotEncoder(categorical_features=[8], sparse=False)),
                ("Scaler", StandardScaler()),
            ]
        )

        pl.fit(data.data, data.target)

        # Convert the model
        spec = convert(pl, data.feature_names, "out")

        if _is_macos() and _macos_version() >= (10, 13):
            input_data = [dict(zip(data.feature_names, row)) for row in data.data]
            output_data = [{"out": row} for row in pl.transform(data.data)]

            result = evaluate_transformer(spec, input_data, output_data)
            assert result["num_errors"] == 0 
Example #7
Source File: test_feature.py    From heamy with MIT License 6 votes vote down vote up
def test_onehot():
    data = load_boston()
    X, y = data['data'], data['target']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=333)
    train = pd.DataFrame(X_train)
    test = pd.DataFrame(X_test)

    t_train, t_test = onehot_features(train.copy(deep=True), test.copy(deep=True), [8, 1, 12], full=False,
                                      dummy_na=True)
    assert t_train.shape[1] == t_test.shape[1]
    assert t_train.shape[1] == 441

    t_train, t_test = onehot_features(train.copy(deep=True), test.copy(deep=True), [8, 1, 12], full=True,
                                      dummy_na=False)
    assert t_train.shape[1] == t_test.shape[1]
    assert t_train.shape[1] == 500 
Example #8
Source File: test_categorical_imputer.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = Imputer(strategy="most_frequent", axis=0)
        scikit_data["data"][1, 8] = np.NaN

        input_data = scikit_data["data"][:, 8].reshape(-1, 1)
        scikit_model.fit(input_data, scikit_data["target"])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model 
Example #9
Source File: test_pipeline.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """

        if not (_HAS_SKLEARN):
            return

        scikit_data = load_boston()
        feature_names = scikit_data.feature_names

        scikit_model = LinearRegression()
        scikit_model.fit(scikit_data["data"], scikit_data["target"])
        scikit_spec = converter.convert(
            scikit_model, feature_names, "target"
        ).get_spec()

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
        self.scikit_spec = scikit_spec 
Example #10
Source File: test_random_forest_classifier.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.ensemble import RandomForestClassifier
        import numpy as np

        scikit_data = load_boston()
        scikit_model = RandomForestClassifier(random_state=1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.scikit_model = scikit_model 
Example #11
Source File: test_NuSVR.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not _HAS_SKLEARN:
            return
        if not _HAS_LIBSVM:
            return

        scikit_data = load_boston()
        prob = svmutil.svm_problem(scikit_data["target"], scikit_data["data"].tolist())
        param = svmutil.svm_parameter()
        param.svm_type = svmutil.NU_SVR
        param.kernel_type = svmutil.LINEAR
        param.eps = 1

        self.libsvm_model = svmutil.svm_train(prob, param) 
Example #12
Source File: test_decision_tree_classifier_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        from sklearn.datasets import load_boston
        import numpy as np

        # Load data and train model
        scikit_data = load_boston()
        num_classes = 3
        self.X = scikit_data.data.astype("f").astype(
            "d"
        )  ## scikit-learn downcasts data
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t, bins=num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = "target" 
Example #13
Source File: test_few.py    From few with GNU General Public License v3.0 6 votes vote down vote up
def test_few_with_parents_weight():
    """test_few.py: few performs without error with parent pressure for selection"""
    np.random.seed(1006987)
    boston = load_boston()
    d = np.column_stack((boston.data,boston.target))
    np.random.shuffle(d)
    features = d[:,0:-1]
    target = d[:,-1]

    print("feature shape:",boston.data.shape)

    learner = FEW(generations=1, population_size=5,
                mutation_rate=1, crossover_rate=1,
                ml = LassoLarsCV(), min_depth = 1, max_depth = 3,
                sel = 'tournament', fit_choice = 'r2',tourn_size = 2, random_state=0, verbosity=0,
                disable_update_check=False, weight_parents=True)

    learner.fit(features[:300], target[:300])
    few_score = learner.score(features[:300], target[:300])
    test_score = learner.score(features[300:],target[300:])

    print("few score:",few_score)
    print("few test score:",test_score) 
Example #14
Source File: test_one_hot_encoder.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_boston_OHE(self):
        data = load_boston()

        for categorical_features in [[3], [8], [3, 8], [8, 3]]:
            model = OneHotEncoder(
                categorical_features=categorical_features, sparse=False
            )
            model.fit(data.data, data.target)

            # Convert the model
            spec = sklearn.convert(model, data.feature_names, "out").get_spec()

            input_data = [dict(zip(data.feature_names, row)) for row in data.data]
            output_data = [{"out": row} for row in model.transform(data.data)]

            result = evaluate_transformer(spec, input_data, output_data)

            assert result["num_errors"] == 0

    # This test still isn't working 
Example #15
Source File: test_boosted_trees_classifier_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        from sklearn.datasets import load_boston

        # Load data and train model
        import numpy as np

        scikit_data = load_boston()
        num_classes = 3
        self.X = scikit_data.data.astype("f").astype(
            "d"
        )  ## scikit-learn downcasts data
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t, bins=num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = "target" 
Example #16
Source File: test_predictor.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_boston_dataset(max_bins):
    boston = load_boston()
    X_train, X_test, y_train, y_test = train_test_split(
        boston.data, boston.target, random_state=42)

    mapper = _BinMapper(max_bins=max_bins, random_state=42)
    X_train_binned = mapper.fit_transform(X_train)

    # Init gradients and hessians to that of least squares loss
    gradients = -y_train.astype(G_H_DTYPE)
    hessians = np.ones(1, dtype=G_H_DTYPE)

    min_samples_leaf = 8
    max_leaf_nodes = 31
    grower = TreeGrower(X_train_binned, gradients, hessians,
                        min_samples_leaf=min_samples_leaf,
                        max_leaf_nodes=max_leaf_nodes, max_bins=max_bins,
                        actual_n_bins=mapper.actual_n_bins_)
    grower.grow()

    predictor = grower.make_predictor(bin_thresholds=mapper.bin_thresholds_)

    assert r2_score(y_train, predictor.predict(X_train)) > 0.85
    assert r2_score(y_test, predictor.predict(X_test)) > 0.70 
Example #17
Source File: test_random_forest_classifier_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeClassifier

        # Load data and train model
        import numpy as np

        scikit_data = load_boston()
        self.X = scikit_data.data.astype("f").astype(
            "d"
        )  ## scikit-learn downcasts data
        t = scikit_data.target
        num_classes = 3
        target = np.digitize(t, np.histogram(t, bins=num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = "target" 
Example #18
Source File: test_base.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_score_sample_weight():

    rng = np.random.RandomState(0)

    # test both ClassifierMixin and RegressorMixin
    estimators = [DecisionTreeClassifier(max_depth=2),
                  DecisionTreeRegressor(max_depth=2)]
    sets = [datasets.load_iris(),
            datasets.load_boston()]

    for est, ds in zip(estimators, sets):
        est.fit(ds.data, ds.target)
        # generate random sample weights
        sample_weight = rng.randint(1, 10, size=len(ds.target))
        # check that the score with and without sample weights are different
        assert_not_equal(est.score(ds.data, ds.target),
                         est.score(ds.data, ds.target,
                                   sample_weight=sample_weight),
                         msg="Unweighted and weighted scores "
                             "are unexpectedly equal") 
Example #19
Source File: test_coordinate_descent.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_warm_start_convergence_with_regularizer_decrement():
    boston = load_boston()
    X, y = boston.data, boston.target

    # Train a model to converge on a lightly regularized problem
    final_alpha = 1e-5
    low_reg_model = ElasticNet(alpha=final_alpha).fit(X, y)

    # Fitting a new model on a more regularized version of the same problem.
    # Fitting with high regularization is easier it should converge faster
    # in general.
    high_reg_model = ElasticNet(alpha=final_alpha * 10).fit(X, y)
    assert_greater(low_reg_model.n_iter_, high_reg_model.n_iter_)

    # Fit the solution to the original, less regularized version of the
    # problem but from the solution of the highly regularized variant of
    # the problem as a better starting point. This should also converge
    # faster than the original model that starts from zero.
    warm_low_reg_model = deepcopy(high_reg_model)
    warm_low_reg_model.set_params(warm_start=True, alpha=final_alpha)
    warm_low_reg_model.fit(X, y)
    assert_greater(low_reg_model.n_iter_, warm_low_reg_model.n_iter_) 
Example #20
Source File: test_decision_tree_classifier.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeClassifier
        from sklearn.preprocessing import MultiLabelBinarizer
        import numpy as np

        scikit_data = load_boston()
        scikit_model = DecisionTreeClassifier(random_state=1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.scikit_model = scikit_model 
Example #21
Source File: test_boosted_trees_classifier.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        import numpy as np

        scikit_data = load_boston()
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        dtrain = xgboost.DMatrix(
            scikit_data.data, label=target, feature_names=scikit_data.feature_names
        )
        self.xgb_model = xgboost.train({}, dtrain)
        self.target = target

        # Save the data and the model
        self.scikit_data = scikit_data
        self.n_classes = len(np.unique(self.target)) 
Example #22
Source File: test_boosted_trees_regression_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        if not _HAS_XGBOOST:
            return
        if not _HAS_SKLEARN:
            return

        # Load data and train model
        scikit_data = load_boston()
        self.X = scikit_data.data.astype("f").astype("d")
        self.dtrain = xgboost.DMatrix(
            scikit_data.data,
            label=scikit_data.target,
            feature_names=scikit_data.feature_names,
        )
        self.feature_names = scikit_data.feature_names
        self.output_name = "target" 
Example #23
Source File: test_boosted_trees_classifier.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        import numpy as np

        scikit_data = load_boston()
        scikit_model = GradientBoostingClassifier(random_state=1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)
        self.target = target

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model 
Example #24
Source File: test_standard_scalar.py    From coremltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_boston(self):
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = StandardScaler().fit(scikit_data.data)

        spec = converter.convert(
            scikit_model, scikit_data.feature_names, "out"
        ).get_spec()

        input_data = [
            dict(zip(scikit_data.feature_names, row)) for row in scikit_data.data
        ]

        output_data = [{"out": row} for row in scikit_model.transform(scikit_data.data)]

        metrics = evaluate_transformer(spec, input_data, output_data)

        assert metrics["num_errors"] == 0 
Example #25
Source File: test_keras_to_pmml_UnitTest.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_keras_02(self):
        boston = load_boston()
        data = pd.DataFrame(boston.data)
        features = list(boston.feature_names)
        target = 'PRICE'
        data.columns = features
        data['PRICE'] = boston.target
        x_train, x_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.20, random_state=42)
        model = Sequential()
        model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
        model.add(Dense(23))
        model.add(Dense(1, kernel_initializer='normal'))
        model.compile(loss='mean_squared_error', optimizer='adam')
        model.fit(x_train, y_train, epochs=1000, verbose=0)
        pmmlObj=KerasToPmml(model)
        pmmlObj.export(open('sequentialModel.pmml','w'),0)
        reconPmmlObj=ny.parse('sequentialModel.pmml',True)
        self.assertEqual(os.path.isfile("sequentialModel.pmml"),True)
        self.assertEqual(len(model.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer)-1) 
Example #26
Source File: test_few.py    From few with GNU General Public License v3.0 6 votes vote down vote up
def test_few_fit_shapes():
    """test_few.py: fit and predict return correct shapes """
    np.random.seed(202)
    # load example data
    boston = load_boston()
    d = pd.DataFrame(data=boston.data)
    print("feature shape:",boston.data.shape)

    learner = FEW(generations=1, population_size=5,
                mutation_rate=0.2, crossover_rate=0.8,
                ml = LassoLarsCV(), min_depth = 1, max_depth = 3,
                sel = 'epsilon_lexicase', tourn_size = 2,
                random_state=0, verbosity=0,
                disable_update_check=False, fit_choice = 'mse')

    score = learner.fit(boston.data[:300], boston.target[:300])
    print("learner:",learner._best_estimator)
    yhat_test = learner.predict(boston.data[300:])
    test_score = learner.score(boston.data[300:],boston.target[300:])
    print("train score:",score,"test score:",test_score,
    "test r2:",r2_score(boston.target[300:],yhat_test))
    assert yhat_test.shape == boston.target[300:].shape 
Example #27
Source File: test_random_forest_regression_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston

        # Load data and train model
        scikit_data = load_boston()
        self.scikit_data = scikit_data
        self.X = scikit_data.data.astype("f").astype(
            "d"
        )  ## scikit-learn downcasts data
        self.target = scikit_data.target
        self.feature_names = scikit_data.feature_names
        self.output_name = "target" 
Example #28
Source File: test_decision_tree_classifier.py    From coremltools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeClassifier

        scikit_data = load_boston()
        scikit_model = DecisionTreeClassifier(random_state=1)
        target = scikit_data["target"] > scikit_data["target"].mean()
        scikit_model.fit(scikit_data["data"], target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model 
Example #29
Source File: test_boosted_trees_regression_numeric.py    From coremltools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setUpClass(self):
        # Load data and train model
        scikit_data = load_boston()
        self.scikit_data = scikit_data
        self.X = scikit_data["data"]
        self.target = scikit_data["target"]
        self.feature_names = scikit_data.feature_names
        self.output_name = "target" 
Example #30
Source File: test_pipeline.py    From coremltools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not _HAS_SKLEARN:
            return
        scikit_data = load_boston()
        feature_names = scikit_data.feature_names

        scikit_model = Pipeline(steps=[("linear", LinearRegression())])
        scikit_model.fit(scikit_data["data"], scikit_data["target"])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model