Python sklearn.preprocessing.KBinsDiscretizer() Examples

The following are 26 code examples for showing how to use sklearn.preprocessing.KBinsDiscretizer(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function .

Example 1
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 6 votes vote down vote up
def test_encode_options():
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
                           encode='ordinal').fit(X)
    Xt_1 = est.transform(X)
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
                           encode='onehot-dense').fit(X)
    Xt_2 = est.transform(X)
    assert not sp.issparse(Xt_2)
    assert_array_equal(OneHotEncoder(
                           categories=[np.arange(i) for i in [2, 3, 3, 3]],
                           sparse=False)
                       .fit_transform(Xt_1), Xt_2)
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
                           encode='onehot').fit(X)
    Xt_3 = est.transform(X)
    assert sp.issparse(Xt_3)
    assert_array_equal(OneHotEncoder(
                           categories=[np.arange(i) for i in [2, 3, 3, 3]],
                           sparse=True)
                       .fit_transform(Xt_1).toarray(),
                       Xt_3.toarray()) 
Example 2
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 6 votes vote down vote up
def test_nonuniform_strategies(
        strategy, expected_2bins, expected_3bins, expected_5bins):
    X = np.array([0, 0.5, 2, 3, 9, 10]).reshape(-1, 1)

    # with 2 bins
    est = KBinsDiscretizer(n_bins=2, strategy=strategy, encode='ordinal')
    Xt = est.fit_transform(X)
    assert_array_equal(expected_2bins, Xt.ravel())

    # with 3 bins
    est = KBinsDiscretizer(n_bins=3, strategy=strategy, encode='ordinal')
    Xt = est.fit_transform(X)
    assert_array_equal(expected_3bins, Xt.ravel())

    # with 5 bins
    est = KBinsDiscretizer(n_bins=5, strategy=strategy, encode='ordinal')
    Xt = est.fit_transform(X)
    assert_array_equal(expected_5bins, Xt.ravel()) 
Example 3
Project: sklearn-onnx   Author: onnx   File: test_sklearn_k_bins_discretiser_converter.py    License: MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_ordinal_uniform(self):
        X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
                      [0, 3.2, 4.7, -8.9]])
        model = KBinsDiscretizer(n_bins=3,
                                 encode="ordinal",
                                 strategy="uniform").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOrdinalUniform",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 4
Project: sklearn-onnx   Author: onnx   File: test_sklearn_k_bins_discretiser_converter.py    License: MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_onehot_dense_uniform(self):
        X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
                      [0, 3.2, 4.7, -8.9]])
        model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
                                 encode="onehot-dense",
                                 strategy="uniform").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOneHotDenseUniform",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 5
Project: sklearn-onnx   Author: onnx   File: test_sklearn_k_bins_discretiser_converter.py    License: MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_ordinal_uniform_int(self):
        X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]])
        model = KBinsDiscretizer(n_bins=3,
                                 encode="ordinal",
                                 strategy="uniform").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOrdinalUniformInt",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 6
Project: sklearn-onnx   Author: onnx   File: test_sklearn_k_bins_discretiser_converter.py    License: MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_ordinal_quantile_int(self):
        X = np.array([
            [1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9],
            [-1, 0, 1, -16], [31, -5, 15, 10], [12, -2, 8, -19],
            [12, 13, 31, -16], [0, -21, 15, 30], [10, 22, 71, -91]
            ])
        model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
                                 encode="ordinal",
                                 strategy="quantile").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOrdinalQuantileInt",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 7
Project: sklearn-onnx   Author: onnx   File: test_sklearn_k_bins_discretiser_converter.py    License: MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_ordinal_kmeans_int(self):
        X = np.array([
            [1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9],
            [-1, 0, 1, -16], [31, -5, 15, 10], [12, -2, 8, -19]
            ])
        model = KBinsDiscretizer(n_bins=3, encode="ordinal",
                                 strategy="kmeans").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOrdinalKMeansInt",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 8
Project: sklearn-onnx   Author: onnx   File: test_sklearn_k_bins_discretiser_converter.py    License: MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_onehot_dense_uniform_int(self):
        X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]])
        model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
                                 encode="onehot-dense",
                                 strategy="uniform").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOneHotDenseUniformInt",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 9
Project: sklearn-onnx   Author: onnx   File: test_sklearn_k_bins_discretiser_converter.py    License: MIT License 6 votes vote down vote up
def test_model_k_bins_discretiser_onehot_dense_quantile_int(self):
        X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]])
        model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
                                 encode="onehot-dense",
                                 strategy="quantile").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOneHotDenseQuantileInt",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 10
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_fit_transform(strategy, expected):
    est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy=strategy)
    est.fit(X)
    assert_array_equal(expected, est.transform(X)) 
Example 11
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_valid_n_bins():
    KBinsDiscretizer(n_bins=2).fit_transform(X)
    KBinsDiscretizer(n_bins=np.array([2])[0]).fit_transform(X)
    assert KBinsDiscretizer(n_bins=2).fit(X).n_bins_.dtype == np.dtype(np.int) 
Example 12
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_invalid_n_bins():
    est = KBinsDiscretizer(n_bins=1)
    assert_raise_message(ValueError, "KBinsDiscretizer received an invalid "
                         "number of bins. Received 1, expected at least 2.",
                         est.fit_transform, X)

    est = KBinsDiscretizer(n_bins=1.1)
    assert_raise_message(ValueError, "KBinsDiscretizer received an invalid "
                         "n_bins type. Received float, expected int.",
                         est.fit_transform, X) 
Example 13
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_invalid_n_bins_array():
    # Bad shape
    n_bins = np.full((2, 4), 2.)
    est = KBinsDiscretizer(n_bins=n_bins)
    assert_raise_message(ValueError,
                         "n_bins must be a scalar or array of shape "
                         "(n_features,).", est.fit_transform, X)

    # Incorrect number of features
    n_bins = [1, 2, 2]
    est = KBinsDiscretizer(n_bins=n_bins)
    assert_raise_message(ValueError,
                         "n_bins must be a scalar or array of shape "
                         "(n_features,).", est.fit_transform, X)

    # Bad bin values
    n_bins = [1, 2, 2, 1]
    est = KBinsDiscretizer(n_bins=n_bins)
    assert_raise_message(ValueError,
                         "KBinsDiscretizer received an invalid number of bins "
                         "at indices 0, 3. Number of bins must be at least 2, "
                         "and must be an int.",
                         est.fit_transform, X)

    # Float bin values
    n_bins = [2.1, 2, 2.1, 2]
    est = KBinsDiscretizer(n_bins=n_bins)
    assert_raise_message(ValueError,
                         "KBinsDiscretizer received an invalid number of bins "
                         "at indices 0, 2. Number of bins must be at least 2, "
                         "and must be an int.",
                         est.fit_transform, X) 
Example 14
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_fit_transform_n_bins_array(strategy, expected):
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode='ordinal',
                           strategy=strategy).fit(X)
    assert_array_equal(expected, est.transform(X))

    # test the shape of bin_edges_
    n_features = np.array(X).shape[1]
    assert est.bin_edges_.shape == (n_features, )
    for bin_edges, n_bins in zip(est.bin_edges_, est.n_bins_):
        assert bin_edges.shape == (n_bins + 1, ) 
Example 15
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_same_min_max(strategy):
    warnings.simplefilter("always")
    X = np.array([[1, -2],
                  [1, -1],
                  [1, 0],
                  [1, 1]])
    est = KBinsDiscretizer(strategy=strategy, n_bins=3, encode='ordinal')
    assert_warns_message(UserWarning,
                         "Feature 0 is constant and will be replaced "
                         "with 0.", est.fit, X)
    assert est.n_bins_[0] == 1
    # replace the feature with zeros
    Xt = est.transform(X)
    assert_array_equal(Xt[:, 0], np.zeros(X.shape[0])) 
Example 16
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_transform_1d_behavior():
    X = np.arange(4)
    est = KBinsDiscretizer(n_bins=2)
    assert_raises(ValueError, est.fit, X)

    est = KBinsDiscretizer(n_bins=2)
    est.fit(X.reshape(-1, 1))
    assert_raises(ValueError, est.transform, X) 
Example 17
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_numeric_stability():
    X_init = np.array([2., 4., 6., 8., 10.]).reshape(-1, 1)
    Xt_expected = np.array([0, 0, 1, 1, 1]).reshape(-1, 1)

    # Test up to discretizing nano units
    for i in range(1, 9):
        X = X_init / 10**i
        Xt = KBinsDiscretizer(n_bins=2, encode='ordinal').fit_transform(X)
        assert_array_equal(Xt_expected, Xt) 
Example 18
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_invalid_encode_option():
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode='invalid-encode')
    assert_raise_message(ValueError, "Valid options for 'encode' are "
                         "('onehot', 'onehot-dense', 'ordinal'). "
                         "Got encode='invalid-encode' instead.",
                         est.fit, X) 
Example 19
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_inverse_transform(strategy, encode, expected_inv):
    kbd = KBinsDiscretizer(n_bins=3, strategy=strategy, encode=encode)
    Xt = kbd.fit_transform(X)
    Xinv = kbd.inverse_transform(Xt)
    assert_array_almost_equal(expected_inv, Xinv) 
Example 20
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_transform_outside_fit_range(strategy):
    X = np.array([0, 1, 2, 3])[:, None]
    kbd = KBinsDiscretizer(n_bins=4, strategy=strategy, encode='ordinal')
    kbd.fit(X)

    X2 = np.array([-2, 5])[:, None]
    X2t = kbd.transform(X2)
    assert_array_equal(X2t.max(axis=0) + 1, kbd.n_bins_)
    assert_array_equal(X2t.min(axis=0), [0]) 
Example 21
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_overwrite():
    X = np.array([0, 1, 2, 3])[:, None]
    X_before = X.copy()

    est = KBinsDiscretizer(n_bins=3, encode="ordinal")
    Xt = est.fit_transform(X)
    assert_array_equal(X, X_before)

    Xt_before = Xt.copy()
    Xinv = est.inverse_transform(Xt)
    assert_array_equal(Xt, Xt_before)
    assert_array_equal(Xinv, np.array([[0.5], [1.5], [2.5], [2.5]])) 
Example 22
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_discretization.py    License: MIT License 5 votes vote down vote up
def test_redundant_bins(strategy, expected_bin_edges):
    X = [[0], [0], [0], [0], [3], [3]]
    kbd = KBinsDiscretizer(n_bins=3, strategy=strategy)
    msg = ("Bins whose width are too small (i.e., <= 1e-8) in feature 0 "
           "are removed. Consider decreasing the number of bins.")
    assert_warns_message(UserWarning, msg, kbd.fit, X)
    assert_array_almost_equal(kbd.bin_edges_[0], expected_bin_edges) 
Example 23
Project: SDGym   Author: sdv-dev   File: utils.py    License: MIT License 5 votes vote down vote up
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.meta = self.get_metadata(data, categorical_columns, ordinal_columns)
        self.column_index = [
            index for index, info in enumerate(self.meta) if info['type'] == CONTINUOUS]

        self.discretizer = KBinsDiscretizer(
            n_bins=self.n_bins, encode='ordinal', strategy='uniform')

        if not self.column_index:
            return

        self.discretizer.fit(data[:, self.column_index]) 
Example 24
Project: sklearn-onnx   Author: onnx   File: test_sklearn_k_bins_discretiser_converter.py    License: MIT License 5 votes vote down vote up
def test_model_k_bins_discretiser_ordinal_quantile(self):
        X = np.array([
            [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
            [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
            [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
            [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
            [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
            ])
        model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
                                 encode="ordinal",
                                 strategy="quantile").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOrdinalQuantile",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 25
Project: sklearn-onnx   Author: onnx   File: test_sklearn_k_bins_discretiser_converter.py    License: MIT License 5 votes vote down vote up
def test_model_k_bins_discretiser_ordinal_kmeans(self):
        X = np.array([
            [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
            [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
            [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
            [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
            [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
            ])
        model = KBinsDiscretizer(n_bins=3, encode="ordinal",
                                 strategy="kmeans").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOrdinalKMeans",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 26
Project: sklearn-onnx   Author: onnx   File: test_sklearn_k_bins_discretiser_converter.py    License: MIT License 5 votes vote down vote up
def test_model_k_bins_discretiser_onehot_dense_quantile(self):
        X = np.array([
            [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
            [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
            [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
            [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
            [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
            ])
        model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
                                 encode="onehot-dense",
                                 strategy="quantile").fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn KBinsDiscretiser",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnKBinsDiscretiserOneHotDenseQuantile",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        )