Python sklearn.preprocessing.QuantileTransformer() Examples

The following are 15 code examples for showing how to use sklearn.preprocessing.QuantileTransformer(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function .

Example 1
Project: sagemaker-scikit-learn-extension   Author: aws   File: base.py    License: Apache License 2.0 6 votes vote down vote up
def fit(self, X, y=None):
        """Compute the lower and upper quantile cutoffs, columns to transform, and each column's quantiles.

        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data array to transform. Must be numeric, non-sparse, and two-dimensional.

        Returns
        -------
        self : QuantileExtremeValueTransformer
        """
        super().fit(X)
        X = check_array(X)
        self.quantile_transformer_ = QuantileTransformer(random_state=0, copy=True)
        self.quantile_transformer_.fit(X)
        return self 
Example 2
Project: CAIL2019   Author: GuidoPaul   File: processing.py    License: MIT License 6 votes vote down vote up
def do_feature_engineering(list_text):
    df = pd.DataFrame(list_text, columns=["col2"])

    feature_list = []
    feature = get_length_related_features_col2(df)
    feature_list.append(feature)
    feature = get_col2_re_features(df)
    feature_list.append(feature)
    index = feature_list[0].index

    for feature_dataset in feature_list[1:]:
        pd.testing.assert_index_equal(index, feature_dataset.index)

    data = pd.concat(feature_list, axis=1)
    qt = QuantileTransformer(random_state=2019)
    for col in data.columns:
        data[col] = qt.fit_transform(data[[col]])
    return data 
Example 3
Project: scikit-downscale   Author: jhamman   File: utils.py    License: Apache License 2.0 5 votes vote down vote up
def fit(self, X):
        """ Fit the quantile mapping model.

        Parameters
        ----------
        X : array-like, shape  [n_samples, n_features]
            Training data.
        """
        X = ensure_samples_features(X)

        qt_kws = self.qt_kwargs.copy()

        if "n_quantiles" not in qt_kws:
            qt_kws["n_quantiles"] = len(X)

        # maybe detrend the input datasets
        if self.detrend:
            x_to_cdf = LinearTrendTransformer(**self.lt_kwargs).fit_transform(X)
        else:
            x_to_cdf = X

        # calculate the cdfs for X
        # TODO: replace this transformer with something that uses robust
        # empirical cdf plotting positions
        self.x_cdf_fit_ = QuantileTransformer(**qt_kws).fit(x_to_cdf)

        return self 
Example 4
Project: sagemaker-scikit-learn-extension   Author: aws   File: base.py    License: Apache License 2.0 5 votes vote down vote up
def _transform_function(self, x, idx=None):
        """Applies single column quantile transform from ``sklearn.preprocessing.QuantileTransformer``.

        Uses ``quantile_transformer_.quantiles_`` calculated during ``fit`` if given an index, otherwise the quantiles
        will be calculated from input ``x``.
        """
        if idx:
            return self.quantile_transformer_._transform_col(  # pylint: disable=protected-access
                x, self.quantile_transformer_.quantiles_[:, idx], False
            )
        return quantile_transform_nonrandom(x) 
Example 5
Project: lale   Author: IBM   File: quantile_transformer.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, n_quantiles=1000, output_distribution='uniform', ignore_implicit_zeros=False, subsample=100000, random_state=None, copy=True):
        self._hyperparams = {
            'n_quantiles': n_quantiles,
            'output_distribution': output_distribution,
            'ignore_implicit_zeros': ignore_implicit_zeros,
            'subsample': subsample,
            'random_state': random_state,
            'copy': copy}
        self._wrapped_model = SKLModel(**self._hyperparams) 
Example 6
Project: mltk-algo-contrib   Author: splunk   File: QuantileTransformer.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            bools=['copy'],
            ints=['n_quantiles'],
            strs=['output_distribution']
        )
        self.estimator = _QuantileTransformer(**out_params)
        self.columns = None 
Example 7
Project: hyperparameter_hunter   Author: HunterMcGushion   File: target_transform_inverse_example.py    License: MIT License 5 votes vote down vote up
def quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100)
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets, transformer 
Example 8
Project: hyperparameter_hunter   Author: HunterMcGushion   File: test_support.py    License: MIT License 5 votes vote down vote up
def bad_quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100)
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets, "i am the wrong type for an inversion result" 
Example 9
Project: hyperparameter_hunter   Author: HunterMcGushion   File: test_intra_cv_target_transform.py    License: MIT License 5 votes vote down vote up
def my_quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="uniform")
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets 
Example 10
Project: hyperparameter_hunter   Author: HunterMcGushion   File: test_both_stages_transform.py    License: MIT License 5 votes vote down vote up
def my_quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="uniform")
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets 
Example 11
Project: hyperparameter_hunter   Author: HunterMcGushion   File: test_feature_optimization.py    License: MIT License 5 votes vote down vote up
def quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100)
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets, transformer 
Example 12
Project: dask-ml   Author: dask   File: test_data.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_basic(self, output_distribution):
        rs = da.random.RandomState(0)
        a = dpp.QuantileTransformer(output_distribution=output_distribution)
        b = spp.QuantileTransformer(output_distribution=output_distribution)

        X = rs.uniform(size=(1000, 3), chunks=50)
        a.fit(X)
        b.fit(X)
        assert_estimator_equal(a, b, atol=0.02)

        # set the quantiles, so that from here out, we're exact
        a.quantiles_ = b.quantiles_
        assert_eq_ar(a.transform(X), b.transform(X), atol=1e-7)
        assert_eq_ar(X, a.inverse_transform(a.transform(X))) 
Example 13
Project: dask-ml   Author: dask   File: test_data.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_types(self, type_, kwargs):
        X = np.random.uniform(size=(1000, 3))
        dX = type_(X, **kwargs)
        qt = spp.QuantileTransformer()
        qt.fit(X)
        dqt = dpp.QuantileTransformer()
        dqt.fit(dX) 
Example 14
Project: dask-ml   Author: dask   File: test_data.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_fit_transform_frame(self):
        df = pd.DataFrame(np.random.randn(1000, 3))
        ddf = dd.from_pandas(df, 2)

        a = spp.QuantileTransformer()
        b = dpp.QuantileTransformer()

        expected = a.fit_transform(df)
        result = b.fit_transform(ddf)
        assert_eq_ar(result, expected, rtol=1e-3, atol=1e-3) 
Example 15
Project: autogluon   Author: awslabs   File: lr_model.py    License: Apache License 2.0 5 votes vote down vote up
def preprocess_train(self, X, feature_types, vect_max_features):
        transformer_list = []
        if len(feature_types['language']) > 0:
            pipeline = Pipeline(steps=[
                ("preparator", NlpDataPreprocessor(nlp_cols=feature_types['language'])),
                ("vectorizer",
                 TfidfVectorizer(ngram_range=self.params['proc.ngram_range'], sublinear_tf=True, max_features=vect_max_features, tokenizer=self.tokenize))
            ])
            transformer_list.append(('vect', pipeline))
        if len(feature_types['onehot']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', OheFeaturesGenerator(cats_cols=feature_types['onehot'])),
            ])
            transformer_list.append(('cats', pipeline))
        if len(feature_types['continuous']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', NumericDataPreprocessor(cont_cols=feature_types['continuous'])),
                ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])),
                ('scaler', StandardScaler())
            ])
            transformer_list.append(('cont', pipeline))
        if len(feature_types['skewed']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', NumericDataPreprocessor(cont_cols=feature_types['skewed'])),
                ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])),
                ('quantile', QuantileTransformer(output_distribution='normal')),  # Or output_distribution = 'uniform'
            ])
            transformer_list.append(('skew', pipeline))
        self.pipeline = FeatureUnion(transformer_list=transformer_list)
        self.pipeline.fit(X)