Python sklearn.base.ClassifierMixin() Examples

The following are 27 code examples of sklearn.base.ClassifierMixin(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.base , or try the search function .
Example #1
Source File: tpot_tests.py    From tpot with GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_template_4():
    """Assert that TPOT template option generates pipeline when one of steps is a specific operator."""

    tpot_obj = TPOTClassifier(
        population_size=5,
        generations=2,
        random_state=42,
        verbosity=0,
        config_dict = 'TPOT light',
        template='SelectPercentile-Transformer-Classifier'
    )
    tpot_obj.fit(pretest_X, pretest_y)

    assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
    assert not (tpot_obj._start_datetime is None)

    sklearn_pipeline = tpot_obj.fitted_pipeline_
    operator_count = tpot_obj._operator_count(tpot_obj._optimized_pipeline)
    assert operator_count == 3
    assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower()
    assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
    assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
    assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin) 
Example #2
Source File: test_multioutput.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_classifier_chain_fit_and_predict_with_logistic_regression():
    # Fit classifier chain and verify predict performance
    X, Y = generate_multilabel_dataset_with_correlations()
    classifier_chain = ClassifierChain(LogisticRegression())
    classifier_chain.fit(X, Y)

    Y_pred = classifier_chain.predict(X)
    assert_equal(Y_pred.shape, Y.shape)

    Y_prob = classifier_chain.predict_proba(X)
    Y_binary = (Y_prob >= .5)
    assert_array_equal(Y_binary, Y_pred)

    assert_equal([c.coef_.size for c in classifier_chain.estimators_],
                 list(range(X.shape[1], X.shape[1] + Y.shape[1])))

    assert isinstance(classifier_chain, ClassifierMixin) 
Example #3
Source File: stacking.py    From stacked_generalization with Apache License 2.0 6 votes vote down vote up
def _get_child_predict(self, clf, X, index=None):
        if self.stack_by_proba and hasattr(clf, 'predict_proba'):
            if self.save_stage0 and index is not None:
                proba = util.saving_predict_proba(clf, X, index)
            else:
                proba = clf.predict_proba(X)
            return proba[:, 1:]
        elif hasattr(clf, 'predict'):
            predict_result = clf.predict(X)
            if isinstance(clf, ClassifierMixin):
                lb = LabelBinarizer()
                lb.fit(predict_result)
                return lb.fit_transform(predict_result)
            else:
                return predict_result.reshape((predict_result.size, 1))
        else:
            return clf.fit_transform(X) 
Example #4
Source File: tpot_tests.py    From tpot with GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_template_3():
    """Assert that TPOT template option generates pipeline when one of steps is a specific operator."""

    tpot_obj = TPOTClassifier(
        random_state=42,
        verbosity=0,
        template='SelectPercentile-Transformer-Classifier'
    )
    tpot_obj._fit_init()
    pop = tpot_obj._toolbox.population(n=10)
    for deap_pipeline in pop:
        operator_count = tpot_obj._operator_count(deap_pipeline)
        sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
        assert operator_count == 3
        assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower()
        assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
        assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin) 
Example #5
Source File: tpot_tests.py    From tpot with GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_template_2():
    """Assert that TPOT template option generates pipeline when each step is operator type with a duplicate main type."""

    tpot_obj = TPOTClassifier(
        random_state=42,
        verbosity=0,
        template='Selector-Selector-Transformer-Classifier'
    )
    tpot_obj._fit_init()
    pop = tpot_obj._toolbox.population(n=10)
    for deap_pipeline in pop:
        operator_count = tpot_obj._operator_count(deap_pipeline)
        sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
        assert operator_count == 4
        assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[1][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[2][1].__class__, TransformerMixin)
        assert issubclass(sklearn_pipeline.steps[3][1].__class__, ClassifierMixin) 
Example #6
Source File: tpot_tests.py    From tpot with GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_template_1():
    """Assert that TPOT template option generates pipeline when each step is a type of operator."""

    tpot_obj = TPOTClassifier(
        random_state=42,
        verbosity=0,
        template='Selector-Transformer-Classifier'
    )
    tpot_obj._fit_init()
    pop = tpot_obj._toolbox.population(n=10)
    for deap_pipeline in pop:
        operator_count = tpot_obj._operator_count(deap_pipeline)
        sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
        assert operator_count == 3
        assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
        assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin) 
Example #7
Source File: test_multioutput.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_base_chain_fit_and_predict():
    # Fit base chain and verify predict performance
    X, Y = generate_multilabel_dataset_with_correlations()
    chains = [RegressorChain(Ridge()),
              ClassifierChain(LogisticRegression())]
    for chain in chains:
        chain.fit(X, Y)
        Y_pred = chain.predict(X)
        assert_equal(Y_pred.shape, Y.shape)
        assert_equal([c.coef_.size for c in chain.estimators_],
                     list(range(X.shape[1], X.shape[1] + Y.shape[1])))

    Y_prob = chains[1].predict_proba(X)
    Y_binary = (Y_prob >= .5)
    assert_array_equal(Y_binary, Y_pred)

    assert isinstance(chains[1], ClassifierMixin) 
Example #8
Source File: mis_classifier.py    From autoimpute with MIT License 6 votes vote down vote up
def __init__(self, classifier=None, predictors="all"):
        """Create an instance of the MissingnessClassifier.

        The MissingnessClassifier inherits from sklearn BaseEstimator and
        ClassifierMixin. This inheritence and this class' implementation
        ensure that the MissingnessClassifier is a valid classifier that will
        work in an sklearn pipeline.

        Args:
            classifier (classifier, optional): valid classifier from sklearn.
                If None, default is xgboost. Note that classifier must
                conform to sklearn style. This means it must implement the
                `predict_proba` method and act as a porper classifier.
            predictors (str, iter, dict, optiona): defaults to all, i.e.
                use all predictors. If all, every column will be used for
                every class prediction. If a list, subset of columns used for
                all predictions. If a dict, specify which columns to use as
                predictors for each imputation. Columns not specified in dict
                will receive `all` by default.
        """
        self.classifier = classifier
        self.predictors = predictors 
Example #9
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def verify(self, X, predict_params = {}, predict_proba_params = {}, precision = 1e-13, zeroThreshold = 1e-13):
		active_fields = _get_column_names(X)
		if self.active_fields is None or active_fields is None:
			raise ValueError("Cannot perform model validation with anonymous data")
		if self.active_fields.tolist() != active_fields.tolist():
			raise ValueError("The columns between training data {} and verification data {} do not match".format(self.active_fields, active_fields))
		active_values = _get_values(X)
		y = self.predict(X, **predict_params)
		target_values = _get_values(y)
		estimator = self._final_estimator
		if isinstance(estimator, BaseEstimator):
			if isinstance(estimator, RegressorMixin):
				self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
			elif isinstance(estimator, ClassifierMixin):
				self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
				if hasattr(estimator, "predict_proba"):
					try:
						y_proba = self.predict_proba(X, **predict_proba_params)
						self.verification.probability_values = _get_values(y_proba)
					except AttributeError:
						pass
		# elif isinstance(estimator, H2OEstimator):
		elif hasattr(estimator, "_estimator_type") and hasattr(estimator, "download_mojo"):
			if estimator._estimator_type == "regressor":
				self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
			elif estimator._estimator_type == "classifier":
				probability_values = target_values[:, 1:]
				target_values = target_values[:, 0]
				self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
				self.verification.probability_values = probability_values 
Example #10
Source File: test_voting.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_sample_weight_kwargs():
    """Check that VotingClassifier passes sample_weight as kwargs"""
    class MockClassifier(BaseEstimator, ClassifierMixin):
        """Mock Classifier to check that sample_weight is received as kwargs"""
        def fit(self, X, y, *args, **sample_weight):
            assert 'sample_weight' in sample_weight

    clf = MockClassifier()
    eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft')

    # Should not raise an error.
    eclf.fit(X, y, sample_weight=np.ones((len(y),))) 
Example #11
Source File: test_voting_classifier.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_sample_weight_kwargs():
    """Check that VotingClassifier passes sample_weight as kwargs"""
    class MockClassifier(BaseEstimator, ClassifierMixin):
        """Mock Classifier to check that sample_weight is received as kwargs"""
        def fit(self, X, y, *args, **sample_weight):
            assert_true('sample_weight' in sample_weight)

    clf = MockClassifier()
    eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft')

    # Should not raise an error.
    eclf.fit(X, y, sample_weight=np.ones((len(y),))) 
Example #12
Source File: tree.py    From sklearn-pmml with MIT License 5 votes vote down vote up
def __init__(self, estimator, context, mode):
        super(DecisionTreeConverter, self).__init__(estimator, context, mode)

        assert len(self.context.schemas[Schema.OUTPUT]) == 1, 'Only one-label trees are supported'
        assert hasattr(estimator, 'tree_'), 'Estimator has no tree_ attribute'
        if mode == ModelMode.CLASSIFICATION:
            if isinstance(self.context.schemas[Schema.OUTPUT][0], CategoricalFeature):
                self.prediction_output = self.OUTPUT_LABEL
            else:
                self.prediction_output = self.OUTPUT_PROBABILITY
            assert isinstance(self.estimator, ClassifierMixin), \
                'Only a classifier can be serialized in classification mode'
        if mode == ModelMode.REGRESSION:
            assert isinstance(self.context.schemas[Schema.OUTPUT][0], NumericFeature), \
                'Only a numeric feature can be an output of regression'
            assert isinstance(self.estimator, RegressorMixin), \
                'Only a regressor can be serialized in regression mode'
        assert estimator.tree_.value.shape[1] == len(self.context.schemas[Schema.OUTPUT]), \
            'Tree outputs {} results while the schema specifies {} output fields'.format(
                estimator.tree_.value.shape[1], len(self.context.schemas[Schema.OUTPUT]))

        # create hidden variables for each categorical output
        # TODO: this code is copied from the ClassifierConverter. To make things right, we need an abstract tree
        # TODO: converter and subclasses for classifier and regression converters
        internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT]))
        self.context.schemas[Schema.INTERNAL] = internal_schema 
Example #13
Source File: model.py    From sklearn-pmml with MIT License 5 votes vote down vote up
def __init__(self, estimator, context):
        """
        :param estimator: Estimator to convert
        :type estimator: BaseEstimator
        :param context: context to work with
        :type context: TransformationContext
        """
        super(ClassifierConverter, self).__init__(estimator, context, ModelMode.CLASSIFICATION)
        assert isinstance(estimator, ClassifierMixin), 'Classifier converter should only be applied to the classification models'
        for f in context.schemas[Schema.OUTPUT]:
            assert isinstance(f, CategoricalFeature), 'Only categorical outputs are supported for classification task'

        # create hidden variables for each categorical output
        internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT]))
        self.context.schemas[Schema.INTERNAL] = internal_schema 
Example #14
Source File: stacking.py    From stacked_generalization with Apache License 2.0 5 votes vote down vote up
def _get_blend_init(self, y_train, clf):
        if self.stack_by_proba and hasattr(clf, 'predict_proba'):
            width = self.n_classes_ - 1
        elif hasattr(clf, 'predict') and isinstance(clf, ClassifierMixin):
            width = self.n_classes_
        elif hasattr(clf, 'predict'):
            width = 1
        elif hasattr(clf, 'n_components'):
            width = clf.n_components
        else:
            raise Exception('Unimplemented for {0}'.format(type(clf)))
        return np.zeros((y_train.size, width)) 
Example #15
Source File: classification.py    From decoding-brain-challenge-2016 with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, metric='riemann', tsupdate=False,
                 clf=LogisticRegression()):
        """Init."""
        self.metric = metric
        self.tsupdate = tsupdate
        self.clf = clf

        if not isinstance(clf, ClassifierMixin):
            raise TypeError('clf must be a ClassifierMixin')

        TangentSpace(metric=self.metric, tsupdate=self.tsupdate) 
Example #16
Source File: ABuMLGrid.py    From abu with GNU General Public License v3.0 5 votes vote down vote up
def _scoring_grid(estimator, scoring):
    """
    只针对有监督学习过滤无监督学习,对scoring未赋予的情况根据
    学习器分类器使用accuracy进行度量,回归器使用可释方差值explained_variance_score,
    使用make_scorer对函数进行score封装

    :param estimator: 学习器对象
    :param scoring: 度量使用的方法,未赋予的情况根据
                    学习器分类器使用accuracy进行度量,回归器使用explained_variance_score进行度量
    :return: scoring
    """

    if not isinstance(estimator, (ClassifierMixin, RegressorMixin)):
        logging.info('only support supervised learning')
        # TODO 无监督学习的scoring度量以及GridSearchCV
        return None

    if scoring is None:
        if isinstance(estimator, ClassifierMixin):
            # 分类器使用accuracy
            return 'accuracy'
        elif isinstance(estimator, RegressorMixin):
            # 回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
            """
                make_scorer中通过greater_is_better对返回值进行正负分配
                eg: sign = 1 if greater_is_better else -1
            """
            return make_scorer(explained_variance_score, greater_is_better=True)
        return None
    return scoring 
Example #17
Source File: ABuML.py    From abu with GNU General Public License v3.0 5 votes vote down vote up
def plot_graphviz_tree(self, **kwargs):
        """
        被装饰器entry_wrapper(support=(EMLFitType.E_FIT_CLF, EMLFitType.E_FIT_REG))装饰,
        即支持有监督学习回归和分类,绘制决策树或者core基于树的分类回归算法的决策示意图绘制,查看
        学习器本身hasattr(fiter, 'tree_')是否有tree_属性,如果没有使用决策树替换

        :param kwargs:  外部可以传递x, y, 通过
                                x = kwargs.pop('x', self.x)
                                y = kwargs.pop('y', self.y)
                        装饰器使用的fiter_type,
                        eg:
                            ttn_abu = AbuML.create_test_more_fiter()
                            ttn_abu.plot_graphviz_tree(fiter_type=ml.EMLFitType.E_FIT_CLF)
        """
        x = kwargs.pop('x', self.x)
        y = kwargs.pop('y', self.y)
        fiter = self.get_fiter()

        if not hasattr(fiter, 'tree_'):
            self.log_func('{} not hasattr tree_, use decision tree replace'.format(
                fiter.__class__.__name__))

            if isinstance(fiter, ClassifierMixin):
                # FIXME 最好不要使用ClassifierMixin判定学习器类型,因为限定了sklearn
                fiter = self.estimator.decision_tree_classifier(assign=False)
            elif isinstance(fiter, RegressorMixin):
                # # FIXME 最好不要使用RegressorMixin, AbuMLCreater中引用了hmmlearn,xgboost等第三方库
                fiter = self.estimator.decision_tree_regressor(assign=False)
            else:
                fiter = self.estimator.decision_tree_classifier(assign=False)
        # 这里需要将self.df.columns做为名字传入
        return ABuMLExecute.graphviz_tree(fiter, self.df.columns, x, y) 
Example #18
Source File: elm.py    From SVM-CNN with Apache License 2.0 5 votes vote down vote up
def score(self, X, y):
        """Force use of accuracy score since we don't inherit
           from ClassifierMixin"""

        from sklearn.metrics import accuracy_score
        return accuracy_score(y, self.predict(X)) 
Example #19
Source File: stacking_estimator.py    From tpot with GNU Lesser General Public License v3.0 5 votes vote down vote up
def transform(self, X):
        """Transform data by adding two synthetic feature(s).

        Parameters
        ----------
        X: numpy ndarray, {n_samples, n_components}
            New data, where n_samples is the number of samples and n_components is the number of components.

        Returns
        -------
        X_transformed: array-like, shape (n_samples, n_features + 1) or (n_samples, n_features + 1 + n_classes) for classifier with predict_proba attribute
            The transformed feature set.
        """
        X = check_array(X)
        X_transformed = np.copy(X)
        # add class probabilities as a synthetic feature
        if issubclass(self.estimator.__class__, ClassifierMixin) and hasattr(self.estimator, 'predict_proba'):
            y_pred_proba = self.estimator.predict_proba(X)
            # check all values that should be not infinity or not NAN
            if np.all(np.isfinite(y_pred_proba)):
                X_transformed = np.hstack((y_pred_proba, X))

        # add class prediction as a synthetic feature
        X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed))

        return X_transformed 
Example #20
Source File: test_voting.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_sample_weight():
    """Tests sample_weight parameter of VotingClassifier"""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = SVC(gamma='scale', probability=True, random_state=123)
    eclf1 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('svc', clf3)],
        voting='soft').fit(X, y, sample_weight=np.ones((len(y),)))
    eclf2 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('svc', clf3)],
        voting='soft').fit(X, y)
    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
    assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))

    sample_weight = np.random.RandomState(123).uniform(size=(len(y),))
    eclf3 = VotingClassifier(estimators=[('lr', clf1)], voting='soft')
    eclf3.fit(X, y, sample_weight)
    clf1.fit(X, y, sample_weight)
    assert_array_equal(eclf3.predict(X), clf1.predict(X))
    assert_array_almost_equal(eclf3.predict_proba(X), clf1.predict_proba(X))

    # check that an error is raised and indicative if sample_weight is not
    # supported.
    clf4 = KNeighborsClassifier()
    eclf3 = VotingClassifier(estimators=[
        ('lr', clf1), ('svc', clf3), ('knn', clf4)],
        voting='soft')
    msg = ('Underlying estimator KNeighborsClassifier does not support '
           'sample weights.')
    with pytest.raises(ValueError, match=msg):
        eclf3.fit(X, y, sample_weight)

    # check that _parallel_fit_estimator will raise the right error
    # it should raise the original error if this is not linked to sample_weight
    class ClassifierErrorFit(BaseEstimator, ClassifierMixin):
        def fit(self, X, y, sample_weight):
            raise TypeError('Error unrelated to sample_weight.')
    clf = ClassifierErrorFit()
    with pytest.raises(TypeError, match='Error unrelated to sample_weight'):
        clf.fit(X, y, sample_weight=sample_weight) 
Example #21
Source File: _test.py    From ibex with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _generate_bases_test(est, pd_est):
    def test(self):
        self.assertTrue(isinstance(pd_est, FrameMixin), pd_est)
        self.assertFalse(isinstance(est, FrameMixin))
        self.assertTrue(isinstance(pd_est, base.BaseEstimator))
        try:
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.DensityMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        except:
            if _sklearn_ver > 17:
                raise
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        for mixin in mixins:
            self.assertEqual(
                isinstance(pd_est, mixin),
                isinstance(est, mixin),
                mixin)

    return test 
Example #22
Source File: util.py    From jh-kaggle-util with Apache License 2.0 5 votes vote down vote up
def transform(self, X):
        X = check_array(X)
        X_transformed = np.copy(X)
        # add class probabilities as a synthetic feature
        if issubclass(self.estimator.__class__, ClassifierMixin) and hasattr(self.estimator, 'predict_proba'):
            X_transformed = np.hstack((self.estimator.predict_proba(X), X))

        # add class prodiction as a synthetic feature
        X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed))

        return X_transformed 
Example #23
Source File: model.py    From ebonite with Apache License 2.0 5 votes vote down vote up
def _exposed_methods_mapping(self) -> Dict[str, str]:
        ret = {
            'predict': 'predict'
        }
        if isinstance(self.model, ClassifierMixin):
            ret['predict_proba'] = 'predict_proba'
        return ret 
Example #24
Source File: investigate.py    From sklearn-onnx with MIT License 4 votes vote down vote up
def enumerate_pipeline_models(pipe, coor=None, vs=None):
    """
    Enumerates all the models within a pipeline.
    """
    if coor is None:
        coor = (0,)
    yield coor, pipe, vs
    if hasattr(pipe, 'transformer_and_mapper_list') and len(
            pipe.transformer_and_mapper_list):
        # azureml DataTransformer
        raise NotImplementedError("Unable to handle this specific case.")
    elif hasattr(pipe, 'mapper') and pipe.mapper:
        # azureml DataTransformer
        for couple in enumerate_pipeline_models(pipe.mapper, coor + (0,)):
            yield couple
    elif hasattr(pipe, 'built_features'):
        # sklearn_pandas.dataframe_mapper.DataFrameMapper
        for i, (columns, transformers, _) in enumerate(pipe.built_features):
            if isinstance(columns, str):
                columns = (columns,)
            if transformers is None:
                yield (coor + (i,)), None, columns
            else:
                for couple in enumerate_pipeline_models(transformers,
                                                        coor + (i,),
                                                        columns):
                    yield couple
    elif isinstance(pipe, Pipeline):
        for i, (_, model) in enumerate(pipe.steps):
            for couple in enumerate_pipeline_models(model, coor + (i,)):
                yield couple
    elif ColumnTransformer is not None and isinstance(pipe, ColumnTransformer):
        for i, (_, fitted_transformer, column) in enumerate(pipe.transformers):
            for couple in enumerate_pipeline_models(
                    fitted_transformer, coor + (i,), column):
                yield couple
    elif isinstance(pipe, FeatureUnion):
        for i, (_, model) in enumerate(pipe.transformer_list):
            for couple in enumerate_pipeline_models(model, coor + (i,)):
                yield couple
    elif TransformedTargetRegressor is not None and isinstance(
            pipe, TransformedTargetRegressor):
        raise NotImplementedError(
            "Not yet implemented for TransformedTargetRegressor.")
    elif isinstance(pipe, (TransformerMixin, ClassifierMixin, RegressorMixin)):
        pass
    elif isinstance(pipe, BaseEstimator):
        pass
    else:
        raise TypeError(
            "Parameter pipe is not a scikit-learn object: {}\n{}".format(
                type(pipe), pipe)) 
Example #25
Source File: calibration.py    From carl with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def __init__(self, base_estimator, method="histogram", bins="auto",
                 interpolation=None, variable_width=False, cv=1):
        """Constructor.

        Parameters
        ----------
        * `base_estimator` [`ClassifierMixin`]:
            The classifier whose output decision function needs to be
            calibrated to offer more accurate predict_proba outputs. If
            `cv=prefit`, the classifier must have been fit already on data.

        * `method` [string]:
            The method to use for calibration. Supported methods include
            `"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and
            `"sigmoid"`.

        * `bins` [int, default="auto"]:
            The number of bins, if `method` is `"histogram"`.

        * `interpolation` [string, optional]
            Specifies the kind of interpolation between bins as a string
            (`"linear"`, `"nearest"`, `"zero"`, `"slinear"`, `"quadratic"`,
            `"cubic"`), if `method` is `"histogram"`.

        * `variable_dith_width` [boolean, optional]
            If True use equal probability variable length bins, if
            `method` is `"histogram"`.

        * `cv` [integer, cross-validation generator, iterable or `"prefit"`]:
            Determines the cross-validation splitting strategy.
            Possible inputs for cv are:

            - integer, to specify the number of folds.
            - An object to be used as a cross-validation generator.
            - An iterable yielding train/test splits.

            If `"prefit"` is passed, it is assumed that base_estimator has been
            fitted already and all data is used for calibration. If `cv=1`,
            the training data is used for both training and calibration.
        """
        self.base_estimator = base_estimator
        self.method = method
        self.bins = bins
        self.interpolation = interpolation
        self.variable_width = variable_width
        self.cv = cv 
Example #26
Source File: base.py    From carl with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def as_classifier(regressor):
    """Wrap a Scikit-Learn regressor into a binary classifier.

    This function can be used to solve a binary classification problem as a
    regression problem, where output labels {0,1} are treated as real values.
    The wrapped regressor exhibits the classifier API, with the corresponding
    `predict`, `predict_proba` and `score` methods.

    Parameters
    ----------
    * `regressor` [`RegressorMixin`]:
        The regressor object.

    Returns
    -------
    * `clf` [`ClassifierMixin`]:
        The wrapped regressor, but with a classifier API.
    """
    class Wrapper(BaseEstimator, ClassifierMixin):
        def __init__(self, base_estimator):
            self.base_estimator = base_estimator

        def fit(self, X, y, **kwargs):
            # Check inputs
            X, y = check_X_y(X, y)

            # Convert y
            label_encoder = LabelEncoder()
            y = label_encoder.fit_transform(y).astype(np.float)

            if len(label_encoder.classes_) != 2:
                raise ValueError

            self.classes_ = label_encoder.classes_

            # Fit regressor
            self.regressor_ = clone(self.base_estimator).fit(X, y, **kwargs)

            return self

        def predict(self, X):
            return np.where(self.predict_proba(X)[:, 1] >= 0.5,
                            self.classes_[1],
                            self.classes_[0])

        def predict_proba(self, X):
            X = check_array(X)

            df = self.regressor_.predict(X)
            df = np.clip(df, 0., 1.)
            probas = np.zeros((len(X), 2))
            probas[:, 0] = 1. - df
            probas[:, 1] = df

            return probas

        def score(self, X, y):
            return self.regressor_.score(X, y)

    return Wrapper(regressor) 
Example #27
Source File: bench_ml.py    From scikit-optimize with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def evaluate(self, point):
        """
        Fits model using the particular setting of hyperparameters and
        evaluates the model validation data.

        Parameters
        ----------
        * `point`: dict
            A mapping of parameter names to the corresponding values

        Returns
        -------
        * `score`: float
            Score (more is better!) for some specific point
        """
        X_train, y_train, X_test, y_test = (
            self.X_train, self.y_train, self.X_test, self.y_test)

        # apply transformation to model parameters, for example exp transformation
        point_mapped = {}
        for param, val in point.items():
            point_mapped[param] = self.space[param][1](val)

        model_instance = self.model(**point_mapped)

        if 'random_state' in model_instance.get_params():
            model_instance.set_params(random_state=self.random_state)

        min_obj_val = -5.0

        # Infeasible parameters are expected to raise an exception, thus the try
        # catch below, infeasible parameters yield assumed smallest objective.
        try:
            model_instance.fit(X_train, y_train)
            if isinstance(model_instance, RegressorMixin): # r^2 metric
                y_predicted = model_instance.predict(X_test)
                score = r2_score(y_test, y_predicted)
            elif isinstance(model_instance, ClassifierMixin): # log loss
                y_predicted = model_instance.predict_proba(X_test)
                score = -log_loss(y_test, y_predicted) # in the context of this function, the higher score is better
            # avoid any kind of singularitites, eg probability being zero, and thus breaking the log_loss
            if math.isnan(score):
                score = min_obj_val
            score = max(score, min_obj_val) # this is necessary to avoid -inf or NaN
        except BaseException as ex:
            score = min_obj_val # on error: return assumed smallest value of objective function

        return score

# this is necessary to generate table for README in the end