Python sklearn.preprocessing.QuantileTransformer() Examples

The following are 15 code examples of sklearn.preprocessing.QuantileTransformer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function .
Example #1
Source File: base.py    From sagemaker-scikit-learn-extension with Apache License 2.0 6 votes vote down vote up
def fit(self, X, y=None):
        """Compute the lower and upper quantile cutoffs, columns to transform, and each column's quantiles.

        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data array to transform. Must be numeric, non-sparse, and two-dimensional.

        Returns
        -------
        self : QuantileExtremeValueTransformer
        """
        super().fit(X)
        X = check_array(X)
        self.quantile_transformer_ = QuantileTransformer(random_state=0, copy=True)
        self.quantile_transformer_.fit(X)
        return self 
Example #2
Source File: processing.py    From CAIL2019 with MIT License 6 votes vote down vote up
def do_feature_engineering(list_text):
    df = pd.DataFrame(list_text, columns=["col2"])

    feature_list = []
    feature = get_length_related_features_col2(df)
    feature_list.append(feature)
    feature = get_col2_re_features(df)
    feature_list.append(feature)
    index = feature_list[0].index

    for feature_dataset in feature_list[1:]:
        pd.testing.assert_index_equal(index, feature_dataset.index)

    data = pd.concat(feature_list, axis=1)
    qt = QuantileTransformer(random_state=2019)
    for col in data.columns:
        data[col] = qt.fit_transform(data[[col]])
    return data 
Example #3
Source File: utils.py    From scikit-downscale with Apache License 2.0 5 votes vote down vote up
def fit(self, X):
        """ Fit the quantile mapping model.

        Parameters
        ----------
        X : array-like, shape  [n_samples, n_features]
            Training data.
        """
        X = ensure_samples_features(X)

        qt_kws = self.qt_kwargs.copy()

        if "n_quantiles" not in qt_kws:
            qt_kws["n_quantiles"] = len(X)

        # maybe detrend the input datasets
        if self.detrend:
            x_to_cdf = LinearTrendTransformer(**self.lt_kwargs).fit_transform(X)
        else:
            x_to_cdf = X

        # calculate the cdfs for X
        # TODO: replace this transformer with something that uses robust
        # empirical cdf plotting positions
        self.x_cdf_fit_ = QuantileTransformer(**qt_kws).fit(x_to_cdf)

        return self 
Example #4
Source File: base.py    From sagemaker-scikit-learn-extension with Apache License 2.0 5 votes vote down vote up
def _transform_function(self, x, idx=None):
        """Applies single column quantile transform from ``sklearn.preprocessing.QuantileTransformer``.

        Uses ``quantile_transformer_.quantiles_`` calculated during ``fit`` if given an index, otherwise the quantiles
        will be calculated from input ``x``.
        """
        if idx:
            return self.quantile_transformer_._transform_col(  # pylint: disable=protected-access
                x, self.quantile_transformer_.quantiles_[:, idx], False
            )
        return quantile_transform_nonrandom(x) 
Example #5
Source File: quantile_transformer.py    From lale with Apache License 2.0 5 votes vote down vote up
def __init__(self, n_quantiles=1000, output_distribution='uniform', ignore_implicit_zeros=False, subsample=100000, random_state=None, copy=True):
        self._hyperparams = {
            'n_quantiles': n_quantiles,
            'output_distribution': output_distribution,
            'ignore_implicit_zeros': ignore_implicit_zeros,
            'subsample': subsample,
            'random_state': random_state,
            'copy': copy}
        self._wrapped_model = SKLModel(**self._hyperparams) 
Example #6
Source File: QuantileTransformer.py    From mltk-algo-contrib with Apache License 2.0 5 votes vote down vote up
def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            bools=['copy'],
            ints=['n_quantiles'],
            strs=['output_distribution']
        )
        self.estimator = _QuantileTransformer(**out_params)
        self.columns = None 
Example #7
Source File: target_transform_inverse_example.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100)
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets, transformer 
Example #8
Source File: test_support.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def bad_quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100)
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets, "i am the wrong type for an inversion result" 
Example #9
Source File: test_intra_cv_target_transform.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def my_quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="uniform")
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets 
Example #10
Source File: test_both_stages_transform.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def my_quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="uniform")
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets 
Example #11
Source File: test_feature_optimization.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def quantile_transform(train_targets, non_train_targets):
    transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100)
    train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
    non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
    return train_targets, non_train_targets, transformer 
Example #12
Source File: test_data.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_basic(self, output_distribution):
        rs = da.random.RandomState(0)
        a = dpp.QuantileTransformer(output_distribution=output_distribution)
        b = spp.QuantileTransformer(output_distribution=output_distribution)

        X = rs.uniform(size=(1000, 3), chunks=50)
        a.fit(X)
        b.fit(X)
        assert_estimator_equal(a, b, atol=0.02)

        # set the quantiles, so that from here out, we're exact
        a.quantiles_ = b.quantiles_
        assert_eq_ar(a.transform(X), b.transform(X), atol=1e-7)
        assert_eq_ar(X, a.inverse_transform(a.transform(X))) 
Example #13
Source File: test_data.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_types(self, type_, kwargs):
        X = np.random.uniform(size=(1000, 3))
        dX = type_(X, **kwargs)
        qt = spp.QuantileTransformer()
        qt.fit(X)
        dqt = dpp.QuantileTransformer()
        dqt.fit(dX) 
Example #14
Source File: test_data.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_fit_transform_frame(self):
        df = pd.DataFrame(np.random.randn(1000, 3))
        ddf = dd.from_pandas(df, 2)

        a = spp.QuantileTransformer()
        b = dpp.QuantileTransformer()

        expected = a.fit_transform(df)
        result = b.fit_transform(ddf)
        assert_eq_ar(result, expected, rtol=1e-3, atol=1e-3) 
Example #15
Source File: lr_model.py    From autogluon with Apache License 2.0 5 votes vote down vote up
def preprocess_train(self, X, feature_types, vect_max_features):
        transformer_list = []
        if len(feature_types['language']) > 0:
            pipeline = Pipeline(steps=[
                ("preparator", NlpDataPreprocessor(nlp_cols=feature_types['language'])),
                ("vectorizer",
                 TfidfVectorizer(ngram_range=self.params['proc.ngram_range'], sublinear_tf=True, max_features=vect_max_features, tokenizer=self.tokenize))
            ])
            transformer_list.append(('vect', pipeline))
        if len(feature_types['onehot']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', OheFeaturesGenerator(cats_cols=feature_types['onehot'])),
            ])
            transformer_list.append(('cats', pipeline))
        if len(feature_types['continuous']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', NumericDataPreprocessor(cont_cols=feature_types['continuous'])),
                ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])),
                ('scaler', StandardScaler())
            ])
            transformer_list.append(('cont', pipeline))
        if len(feature_types['skewed']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', NumericDataPreprocessor(cont_cols=feature_types['skewed'])),
                ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])),
                ('quantile', QuantileTransformer(output_distribution='normal')),  # Or output_distribution = 'uniform'
            ])
            transformer_list.append(('skew', pipeline))
        self.pipeline = FeatureUnion(transformer_list=transformer_list)
        self.pipeline.fit(X)