Python sklearn.base.TransformerMixin() Examples

The following are 27 code examples of sklearn.base.TransformerMixin(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.base , or try the search function

Example #1

Source File: common_utils.py From interpret-text with MIT License

8 votes

def create_pandas_only_svm_classifier(X, y, probability=True):
    class PandasOnlyEstimator(TransformerMixin):
        def fit(self, X, y=None, **fitparams):
            return self

        def transform(self, X, **transformparams):
            dataset_is_df = isinstance(X, pd.DataFrame)
            if not dataset_is_df:
                raise Exception("Dataset must be a pandas dataframe!")
            return X

    pandas_only = PandasOnlyEstimator()

    clf = svm.SVC(gamma=0.001, C=100.0, probability=probability, random_state=777)
    pipeline = Pipeline([("pandas_only", pandas_only), ("clf", clf)])
    return pipeline.fit(X, y)

Example #2

Source File: common_utils.py From interpret-community with MIT License

6 votes

def create_pandas_only_svm_classifier(X, y, probability=True):
    class PandasOnlyEstimator(TransformerMixin):
        def fit(self, X, y=None, **fitparams):
            return self

        def transform(self, X, **transformparams):
            dataset_is_df = isinstance(X, pd.DataFrame)
            if not dataset_is_df:
                raise Exception("Dataset must be a pandas dataframe!")
            return X

    pandas_only = PandasOnlyEstimator()

    clf = svm.SVC(gamma=0.001, C=100., probability=probability, random_state=777)
    pipeline = Pipeline([('pandas_only', pandas_only), ('clf', clf)])
    return pipeline.fit(X, y)

Example #3

Source File: filters.py From causallib with Apache License 2.0

6 votes

def track_selected_features(pipeline_stages, num_features):
    """

    Args:
        pipeline_stages (list [tuple[str, TransformerMixin]]): list of steps. each step is a tuple of Name and
                                                               Transformer Object.
        num_features (int):

    Returns:
        np.ndarray:
    """
    selected_features = np.arange(num_features)
    for p_name, p in pipeline_stages:
        if not isinstance(p, BaseFeatureSelector):
            continue
        p_features = p.selected_features
        selected_features = selected_features[p_features]
    return selected_features

Example #4

Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0

6 votes

def test_template_4():
    """Assert that TPOT template option generates pipeline when one of steps is a specific operator."""

    tpot_obj = TPOTClassifier(
        population_size=5,
        generations=2,
        random_state=42,
        verbosity=0,
        config_dict = 'TPOT light',
        template='SelectPercentile-Transformer-Classifier'
    )
    tpot_obj.fit(pretest_X, pretest_y)

    assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
    assert not (tpot_obj._start_datetime is None)

    sklearn_pipeline = tpot_obj.fitted_pipeline_
    operator_count = tpot_obj._operator_count(tpot_obj._optimized_pipeline)
    assert operator_count == 3
    assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower()
    assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
    assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
    assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)

Example #5

Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0

6 votes

def test_template_3():
    """Assert that TPOT template option generates pipeline when one of steps is a specific operator."""

    tpot_obj = TPOTClassifier(
        random_state=42,
        verbosity=0,
        template='SelectPercentile-Transformer-Classifier'
    )
    tpot_obj._fit_init()
    pop = tpot_obj._toolbox.population(n=10)
    for deap_pipeline in pop:
        operator_count = tpot_obj._operator_count(deap_pipeline)
        sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
        assert operator_count == 3
        assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower()
        assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
        assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)

Example #6

Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0

6 votes

def test_template_2():
    """Assert that TPOT template option generates pipeline when each step is operator type with a duplicate main type."""

    tpot_obj = TPOTClassifier(
        random_state=42,
        verbosity=0,
        template='Selector-Selector-Transformer-Classifier'
    )
    tpot_obj._fit_init()
    pop = tpot_obj._toolbox.population(n=10)
    for deap_pipeline in pop:
        operator_count = tpot_obj._operator_count(deap_pipeline)
        sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
        assert operator_count == 4
        assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[1][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[2][1].__class__, TransformerMixin)
        assert issubclass(sklearn_pipeline.steps[3][1].__class__, ClassifierMixin)

Example #7

Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0

6 votes

def test_template_1():
    """Assert that TPOT template option generates pipeline when each step is a type of operator."""

    tpot_obj = TPOTClassifier(
        random_state=42,
        verbosity=0,
        template='Selector-Transformer-Classifier'
    )
    tpot_obj._fit_init()
    pop = tpot_obj._toolbox.population(n=10)
    for deap_pipeline in pop:
        operator_count = tpot_obj._operator_count(deap_pipeline)
        sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
        assert operator_count == 3
        assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
        assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)

Example #8

Source File: test_base.py From twitter-stock-recommendation with MIT License

5 votes

def test_clone_pandas_dataframe():

    class DummyEstimator(BaseEstimator, TransformerMixin):
        """This is a dummy class for generating numerical features

        This feature extractor extracts numerical features from pandas data
        frame.

        Parameters
        ----------

        df: pandas data frame
            The pandas data frame parameter.

        Notes
        -----
        """
        def __init__(self, df=None, scalar_param=1):
            self.df = df
            self.scalar_param = scalar_param

        def fit(self, X, y=None):
            pass

        def transform(self, X):
            pass

    # build and clone estimator
    d = np.arange(10)
    df = MockDataFrame(d)
    e = DummyEstimator(df, scalar_param=1)
    cloned_e = clone(e)

    # the test
    assert_true((e.df == cloned_e.df).values.all())
    assert_equal(e.scalar_param, cloned_e.scalar_param)

Example #9

Source File: normalization_strategy_selector.py From Auto-PyTorch with Apache License 2.0

5 votes

def add_normalization_strategy(self, name, normalization_type, is_default_normalization_strategy=False):
        """Add a normalization strategy.
        Will be called with {pipeline_config, X, Y}
        
        Arguments:
            name {string} -- name of normalization strategy for definition in config
            normalization_strategy {function} -- callable with {pipeline_config, X}
            is_default_normalization_strategy {bool} -- should the given normalization_strategy be the default normalization_strategy if not specified in config
        """

        if (not issubclass(normalization_type, BaseEstimator) and not issubclass(normalization_type, TransformerMixin)):
            raise ValueError("normalization_type must be subclass of BaseEstimator")
        self.normalization_strategies[name] = normalization_type

Example #10

Source File: data_splitters.py From MAST-ML with MIT License

5 votes

def split(self, X, y, groups):
        n_groups = self.get_n_splits(groups=groups)
        #print('n_groups', n_groups)
        lpgo = ms.LeavePGroupsOut(n_groups=n_groups-1)
        return lpgo.split(X, y, groups)

#class WithoutElement(BaseEstimator, TransformerMixin):
#    " Train the model without each element, then test on the rows with that element "
#    pass

Example #11

Source File: ABuML.py From abu with GNU General Public License v3.0

5 votes

def fit_transform(self, **kwargs):
        """
        被装饰器@entry_wrapper()装饰，默认参数即支持有监督和无监督学习，
        内部通过检测isinstance(fiter, TransformerMixin) or hasattr(fiter, 'fit_transform')
        来判定是否可以fit_transform

        eg：
            input:  ttn_abu.x.shape
            output: (891, 14)

            input:  ttn_abu.fit_transform(fiter_type=ml.EMLFitType.E_FIT_PCA).shape
            output: (891, 4)

            input:  ttn_abu.fit_transform(fiter_type=ml.EMLFitType.E_FIT_KMEAN).shape
            output: (891, 2)

        :param kwargs: 外部可以传递x, y, 通过
                                x = kwargs.pop('x', self.x)
                                y = kwargs.pop('y', self.y)
                       以及装饰器使用的fiter_type，eg：ttn_abu.fit_transform(fiter_type=ml.EMLFitType.E_FIT_CLF)
        :return: fit_transform后的转换结果矩阵
        """
        fiter = self.get_fiter()
        if isinstance(fiter, TransformerMixin) or hasattr(fiter, 'fit_transform'):
            x = kwargs.pop('x', self.x)
            y = kwargs.pop('y', self.y)
            if self.is_supervised_learning():
                trans = fiter.fit_transform(x, y)
            else:
                trans = fiter.fit_transform(x)
            return trans
        else:
            self.log_func('{} not support fit_transform'.format(fiter))

Example #12

Source File: base.py From smrt with BSD 3-Clause "New" or "Revised" License

5 votes

def transform(self, X):
        """Inherited from the ``TransformerMixin``. Pass the ``X`` array
        through the inferential MLP layers.

        Parameters
        ----------
        X : array-like, shape=(n_samples, n_features)
            The array of samples that will be encoded into the new
            hidden layer space.
        """
        return self.encode(X)

Example #13

Source File: test_preprocessing.py From skl-groups with BSD 3-Clause "New" or "Revised" License

5 votes

def test_basic():
    bags = [np.random.normal(5, 3, size=(np.random.randint(10, 100), 20))
            for _ in xrange(50)]
    feats = Features(bags, stack=True)

    stder = BagStandardizer()
    stdized = stder.fit_transform(bags)
    stdized.make_stacked()

    assert np.allclose(np.mean(stdized.stacked_features), 0)
    assert np.allclose(np.std(stdized.stacked_features), 1)

    first_five = stder.transform(bags[:5])
    assert first_five == stdized[:5]

    minmaxer = BagMinMaxScaler([3, 7])
    minmaxed = minmaxer.fit_transform(feats)
    minmaxed.make_stacked()
    assert np.allclose(np.min(minmaxed.stacked_features, 0), 3)
    assert np.allclose(np.max(minmaxed.stacked_features, 0), 7)

    normer = BagNormalizer('l1')
    normed = normer.fit_transform(Features(bags))
    normed.make_stacked()
    assert np.allclose(np.sum(np.abs(normed.stacked_features), 1), 1)

    class GetMean(BaseEstimator, TransformerMixin):
        def fit(self, X, y=None):
            return self
        def transform(self, X):
            return X.mean(axis=1)[None, :]
    m = BagPreprocesser(GetMean())
    assert_raises(ValueError, lambda: m.transform(bags))

Example #14

Source File: test_step.py From baikal with BSD 3-Clause "New" or "Revised" License

5 votes

def test_get_params_without_init(self, teardown):
        """Test edge case where the base class does not define
        an __init__ method. get_params should resolve to object.__init__
        which results in an empty dict.
        """

        class TransformerWithoutInit(TransformerMixin, BaseEstimator):
            pass

        class TransformerWithoutInitStep(Step, TransformerWithoutInit):
            pass

        step = TransformerWithoutInitStep()
        assert step.get_params() == {}

Example #15

Source File: _test.py From ibex with BSD 3-Clause "New" or "Revised" License

5 votes

def _generate_bases_test(est, pd_est):
    def test(self):
        self.assertTrue(isinstance(pd_est, FrameMixin), pd_est)
        self.assertFalse(isinstance(est, FrameMixin))
        self.assertTrue(isinstance(pd_est, base.BaseEstimator))
        try:
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.DensityMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        except:
            if _sklearn_ver > 17:
                raise
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        for mixin in mixins:
            self.assertEqual(
                isinstance(pd_est, mixin),
                isinstance(est, mixin),
                mixin)

    return test

Example #16