Python sklearn.preprocessing.QuantileTransformer() Examples

The following are 15 code examples of sklearn.preprocessing.QuantileTransformer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function

Example #1

Source File: base.py From sagemaker-scikit-learn-extension with Apache License 2.0

6 votes

def fit(self, X, y=None):
        """Compute the lower and upper quantile cutoffs, columns to transform, and each column's quantiles.

        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data array to transform. Must be numeric, non-sparse, and two-dimensional.

        Returns
        -------
        self : QuantileExtremeValueTransformer
        """
        super().fit(X)
        X = check_array(X)
        self.quantile_transformer_ = QuantileTransformer(random_state=0, copy=True)
        self.quantile_transformer_.fit(X)
        return self

Example #2

Source File: processing.py From CAIL2019 with MIT License

6 votes

def do_feature_engineering(list_text):
    df = pd.DataFrame(list_text, columns=["col2"])

    feature_list = []
    feature = get_length_related_features_col2(df)
    feature_list.append(feature)
    feature = get_col2_re_features(df)
    feature_list.append(feature)
    index = feature_list[0].index

    for feature_dataset in feature_list[1:]:
        pd.testing.assert_index_equal(index, feature_dataset.index)

    data = pd.concat(feature_list, axis=1)
    qt = QuantileTransformer(random_state=2019)
    for col in data.columns:
        data[col] = qt.fit_transform(data[[col]])
    return data

Example #3

Source File: utils.py From scikit-downscale with Apache License 2.0

5 votes

def fit(self, X):
        """ Fit the quantile mapping model.

        Parameters
        ----------
        X : array-like, shape  [n_samples, n_features]
            Training data.
        """
        X = ensure_samples_features(X)

        qt_kws = self.qt_kwargs.copy()

        if "n_quantiles" not in qt_kws:
            qt_kws["n_quantiles"] = len(X)

        # maybe detrend the input datasets
        if self.detrend:
            x_to_cdf = LinearTrendTransformer(**self.lt_kwargs).fit_transform(X)
        else:
            x_to_cdf = X

        # calculate the cdfs for X
        # TODO: replace this transformer with something that uses robust
        # empirical cdf plotting positions
        self.x_cdf_fit_ = QuantileTransformer(**qt_kws).fit(x_to_cdf)

        return self

Example #4

Source File: base.py From sagemaker-scikit-learn-extension with Apache License 2.0

5 votes

def _transform_function(self, x, idx=None):
        """Applies single column quantile transform from ``sklearn.preprocessing.QuantileTransformer``.

        Uses ``quantile_transformer_.quantiles_`` calculated during ``fit`` if given an index, otherwise the quantiles
        will be calculated from input ``x``.
        """
        if idx:
            return self.quantile_transformer_._transform_col(  # pylint: disable=protected-access
                x, self.quantile_transformer_.quantiles_[:, idx], False
            )
        return quantile_transform_nonrandom(x)

Example #5

Source File: quantile_transformer.py From lale with Apache License 2.0

5 votes

def __init__(self, n_quantiles=1000, output_distribution='uniform', ignore_implicit_zeros=False, subsample=100000, random_state=None, copy=True):
        self._hyperparams = {
            'n_quantiles': n_quantiles,
            'output_distribution': output_distribution,
            'ignore_implicit_zeros': ignore_implicit_zeros,
            'subsample': subsample,
            'random_state': random_state,
            'copy': copy}
        self._wrapped_model = SKLModel(**self._hyperparams)

Example #6

Source File: QuantileTransformer.py From mltk-algo-contrib with Apache License 2.0

5 votes

def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            bools=['copy'],
            ints=['n_quantiles'],
            strs=['output_distribution']
        )
        self.estimator = _QuantileTransformer(**out_params)
        self.columns = None

Example #7

Source File: target_transform_inverse_example.py From hyperparameter_hunter with MIT License

5 votes

def quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100)
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets, transformer

Example #8

Source File: test_support.py From hyperparameter_hunter with MIT License

5 votes

def bad_quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100)
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets, "i am the wrong type for an inversion result"

Example #9

Source File: test_intra_cv_target_transform.py From hyperparameter_hunter with MIT License

5 votes

def my_quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="uniform")
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets

Example #10

Source File: test_both_stages_transform.py From hyperparameter_hunter with MIT License

5 votes

def my_quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="uniform")
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets

Example #11

Source File: test_feature_optimization.py From hyperparameter_hunter with MIT License

5 votes

def quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100)
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets, transformer

Example #12

Source File: test_data.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_basic(self, output_distribution):
        rs = da.random.RandomState(0)
        a = dpp.QuantileTransformer(output_distribution=output_distribution)
        b = spp.QuantileTransformer(output_distribution=output_distribution)

        X = rs.uniform(size=(1000, 3), chunks=50)
        a.fit(X)
        b.fit(X)
        assert_estimator_equal(a, b, atol=0.02)

        # set the quantiles, so that from here out, we're exact
        a.quantiles_ = b.quantiles_
        assert_eq_ar(a.transform(X), b.transform(X), atol=1e-7)
        assert_eq_ar(X, a.inverse_transform(a.transform(X)))

Example #13

Source File: test_data.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_types(self, type_, kwargs):
        X = np.random.uniform(size=(1000, 3))
        dX = type_(X, **kwargs)
        qt = spp.QuantileTransformer()
        qt.fit(X)
        dqt = dpp.QuantileTransformer()
        dqt.fit(dX)

Example #14

Source File: test_data.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_fit_transform_frame(self):
        df = pd.DataFrame(np.random.randn(1000, 3))
        ddf = dd.from_pandas(df, 2)

        a = spp.QuantileTransformer()
        b = dpp.QuantileTransformer()

        expected = a.fit_transform(df)
        result = b.fit_transform(ddf)
        assert_eq_ar(result, expected, rtol=1e-3, atol=1e-3)

Example #15

Source File: lr_model.py From autogluon with Apache License 2.0

5 votes

def preprocess_train(self, X, feature_types, vect_max_features):
        transformer_list = []
        if len(feature_types['language']) > 0:
            pipeline = Pipeline(steps=[
                ("preparator", NlpDataPreprocessor(nlp_cols=feature_types['language'])),
                ("vectorizer",
                 TfidfVectorizer(ngram_range=self.params['proc.ngram_range'], sublinear_tf=True, max_features=vect_max_features, tokenizer=self.tokenize))
            ])
            transformer_list.append(('vect', pipeline))
        if len(feature_types['onehot']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', OheFeaturesGenerator(cats_cols=feature_types['onehot'])),
            ])
            transformer_list.append(('cats', pipeline))
        if len(feature_types['continuous']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', NumericDataPreprocessor(cont_cols=feature_types['continuous'])),
                ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])),
                ('scaler', StandardScaler())
            ])
            transformer_list.append(('cont', pipeline))
        if len(feature_types['skewed']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', NumericDataPreprocessor(cont_cols=feature_types['skewed'])),
                ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])),
                ('quantile', QuantileTransformer(output_distribution='normal')),  # Or output_distribution = 'uniform'
            ])
            transformer_list.append(('skew', pipeline))
        self.pipeline = FeatureUnion(transformer_list=transformer_list)
        self.pipeline.fit(X)