python source code of

ibex-master
- .coveragerc
- examples
  - iris_feature_transform.ipynb
  - in_prog
    - movielens_simple_row_aggregating_features_with_stacking.ipynb
  - movielens_simple_row_aggregating_features.ipynb
  - iris_feature_importance.ipynb
  - movielens_nmf.ipynb
  - boston_plotting_cv_preds.ipynb
  - movielens_data
    - ml-100k
  - iris_tensorflow.ipynb
  - digits_confidence_intervals.ipynb
- test
  - _test.py
- .landscape.yaml
- .readthedocs.yml
- README.rst
- setup.py
- .travis.yml
- ibex
  - _xy_estimator.py
  - _adapter.py
  - xgboost
    - __init__.py
  - _utils.py
  - sklearn
    - _cluster.py
    - _regression_coef_intercept.py
    - _decomposition.py
    - _feature_selection.py
    - _pipeline.py
    - _feature_importances.py
    - _preprocessing.py
    - __init__.py
    - _classification_coef_intercept.py
    - _predict_star.py
    - _cross_val_predict.py
  - __init__.py
  - _base.py
  - tensorflow
    - __init__.py
    - contrib
      - __init__.py
      - keras
        __init__.py
        wrappers
        scikit_learn
        __init__.py
        __init__.py
  - _function_transformer.py
- scripts
  - analyze_system_sklearn.py
- .gitignore
- docs
  - Makefile
  - source
    - api_ibex_sklearn_feature_selection_selectfwe.rst
    - api_ibex_sklearn_mixture_dpgmm.rst
    - api_ibex_sklearn_preprocessing_multilabelbinarizer.rst
    - api_ibex_sklearn_svm_oneclasssvm.rst
    - api_ibex_sklearn_feature_selection_selectfpr.rst
    - api_ibex_sklearn_cluster_spectralcoclustering.rst
    - api_ibex_sklearn_calibration_linearsvc.rst
    - api_ibex_sklearn_discriminant_analysis_lineardiscriminantanalysis.rst
    - api_ibex_sklearn_ensemble_randomforestregressor.rst
    - pipelines.rst
    - api_ibex_sklearn_mixture_gaussianmixture.rst
    - api_ibex_sklearn_linear_model_logisticregression.rst
    - api_ibex_sklearn_linear_model_ridgecv.rst
    - api_ibex_sklearn_manifold_locallylinearembedding.rst
    - api_ibex_sklearn_grid_search_baseestimator.rst
    - api_ibex_sklearn_linear_model_lassocv.rst
    - api_ibex_sklearn_feature_selection_selectkbest.rst
    - api_ibex_sklearn_covariance_ledoitwolf.rst
    - api_ibex_sklearn_gaussian_process_gaussianprocessclassifier.rst
    - api_ibex_sklearn_multiclass_onevsrestclassifier.rst
    - api_ibex_sklearn_linear_model_ridgeclassifiercv.rst
    - api_ibex_sklearn_preprocessing_binarizer.rst
    - api_ibex_sklearn_grid_search_gridsearchcv.rst
    - api_ibex_transformer_example.rst
    - api_ibex_sklearn_multiclass_onevsoneclassifier.rst
    - api_ibex_tensorflow_contrib_keras_wrappers_scikit_learn_keras_regressor.rst
    - api_ibex_sklearn_calibration_baseestimator.rst
    - api_ibex_sklearn_naive_bayes_labelbinarizer.rst
    - api_ibex_sklearn_cross_decomposition_plscanonical.rst
    - api_ibex_sklearn_ensemble_gradientboostingclassifier.rst
    - api_ibex_sklearn_feature_selection_variancethreshold.rst
    - api_ibex_sklearn_feature_selection_genericunivariateselect.rst
    - api_ibex_sklearn_decomposition_kernelpca.rst
    - api_ibex_sklearn_neighbors_radiusneighborsregressor.rst
    - api_ibex_sklearn_neighbors_nearestneighbors.rst
    - api_ibex_sklearn_preprocessing_maxabsscaler.rst
    - api_ibex_sklearn_linear_model_multitaskelasticnet.rst
    - api_ibex_sklearn_calibration__sigmoidcalibration.rst
    - api_ibex_sklearn_feature_selection_rfe.rst
    - api_ibex_sklearn_svm_svr.rst
    - api_ibex_sklearn_linear_model_linearregression.rst
    - api_ibex_sklearn_feature_selection_selectfdr.rst
    - api_ibex_sklearn_kernel_approximation_nystroem.rst
    - api_ibex_sklearn_covariance_graphlasso.rst
    - api_ibex_sklearn_linear_model_lasso.rst
    - api_ibex_sklearn_covariance_ellipticenvelope.rst
    - api_ibex_sklearn_multiclass_labelbinarizer.rst
    - api_ibex_sklearn_calibration_labelencoder.rst
    - api_ibex_sklearn_decomposition_randomizedpca.rst
    - api_ibex_sklearn_naive_bayes_bernoullinb.rst
    - api_ibex_sklearn_tree_decisiontreeclassifier.rst
    - api_ibex_sklearn_multiclass_outputcodeclassifier.rst
    - api_ibex_sklearn_kernel_ridge_kernelridge.rst
    - api_ibex_sklearn_linear_model_elasticnetcv.rst
    - api_ibex_sklearn_cluster_meanshift.rst
    - api_ibex_sklearn_ensemble_randomforestclassifier.rst
    - api_ibex_sklearn_cluster_featureagglomeration.rst
    - api_ibex_sklearn_kernel_approximation_rbfsampler.rst
    - api_ibex_sklearn_tree_extratreeclassifier.rst
    - api_ibex_sklearn_linear_model_passiveaggressiveregressor.rst
    - api_ibex_sklearn_manifold_tsne.rst
    - api_ibex_sklearn_linear_model_sgdregressor.rst
    - api_ibex_sklearn_kernel_ridge_baseestimator.rst
    - api_ibex_sklearn_covariance_oas.rst
    - api_ibex_sklearn_gaussian_process_gaussianprocessregressor.rst
    - api_ibex_sklearn_preprocessing_normalizer.rst
    - api_ibex_sklearn_naive_bayes_multinomialnb.rst
    - api_ibex_sklearn_linear_model_larscv.rst
    - api_ibex_sklearn_linear_model_ridge.rst
    - api_ibex_sklearn_ensemble_randomtreesembedding.rst
    - api_ibex_sklearn_covariance_mincovdet.rst
    - api_ibex_sklearn_linear_model_logisticregressioncv.rst
    - api_ibex_sklearn_kernel_approximation_additivechi2sampler.rst
    - api_ibex_sklearn_feature_selection_selectfrommodel.rst
    - api_ibex_sklearn_ensemble_adaboostclassifier.rst
    - api_ibex_sklearn_svm_nusvc.rst
    - api_ibex_sklearn_preprocessing_robustscaler.rst
    - api_ibex_sklearn_multioutput_multioutputregressor.rst
    - api_ibex_sklearn_preprocessing_imputer.rst
    - api.rst
    - api_ibex_sklearn_linear_model_elasticnet.rst
    - api_ibex_sklearn_neighbors_kneighborsregressor.rst
    - api_ibex_sklearn_cluster_minibatchkmeans.rst
    - spelling_wordlist.txt
    - overview.rst
    - api_ibex_sklearn_linear_model_passiveaggressiveclassifier.rst
    - api_ibex_sklearn_semi_supervised_labelpropagation.rst
    - api_ibex_sklearn_mixture_gmm.rst
    - api_ibex_sklearn_decomposition_pca.rst
    - api_ibex_sklearn_naive_bayes_gaussiannb.rst
    - api_ibex_sklearn_cluster_spectralbiclustering.rst
    - api_ibex_sklearn_decomposition_truncatedsvd.rst
    - api_ibex_sklearn_preprocessing_onehotencoder.rst
    - api_ibex.rst
    - api_ibex_sklearn_pipeline__basecomposition.rst
    - feature_union.rst
    - api_ibex_sklearn_preprocessing_minmaxscaler.rst
    - examples.rst
    - api_ibex_sklearn_linear_model_lassolarscv.rst
    - api_ibex_sklearn_decomposition_incrementalpca.rst
    - api_ibex_sklearn_calibration_labelbinarizer.rst
    - sklearn.rst
    - api_ibex_sklearn_decomposition_latentdirichletallocation.rst
    - api_ibex_sklearn_neighbors_kneighborsclassifier.rst
    - api_ibex_sklearn_ensemble_baseensemble.rst
    - input_verification_and_output_processing.rst
    - api_ibex_sklearn_linear_model_orthogonalmatchingpursuit.rst
    - api_ibex_tensorflow_contrib_keras_wrappers_scikit_learn_keras_classifier.rst
    - api_ibex_sklearn_multioutput_multioutputclassifier.rst
    - api_ibex_sklearn_cluster_birch.rst
    - api_ibex_sklearn_covariance_empiricalcovariance.rst
    - api_ibex_sklearn_isotonic_isotonicregression.rst
    - api_ibex_sklearn_ensemble_extratreesclassifier.rst
    - api_ibex_sklearn_decomposition_nmf.rst
    - api_ibex_sklearn_cluster_dbscan.rst
    - api_ibex_regressor_example.rst
    - api_ibex_sklearn_cluster_affinitypropagation.rst
    - api_ibex_sklearn_ensemble_isolationforest.rst
    - api_ibex_sklearn_calibration_calibratedclassifiercv.rst
    - api_ibex_sklearn_cross_decomposition_plsregression.rst
    - api_ibex_sklearn_grid_search_basesearchcv.rst
    - api_ibex_sklearn_neural_network_bernoullirbm.rst
    - api_ibex_sklearn_svm_linearsvc.rst
    - tensorflow.rst
    - api_ibex_sklearn_svm_linearsvr.rst
    - api_ibex_sklearn_feature_extraction_dictvectorizer.rst
    - api_ibex_sklearn_neighbors_lshforest.rst
    - api_ibex_sklearn_linear_model_theilsenregressor.rst
    - api_ibex_sklearn_calibration_isotonicregression.rst
    - api.rst.jinja2
    - api_ibex_sklearn_linear_model_huberregressor.rst
    - api_ibex_sklearn_feature_selection_selectpercentile.rst
    - api_ibex_sklearn_decomposition_minibatchsparsepca.rst
    - api_ibex_sklearn_ensemble_votingclassifier.rst
    - api_ibex_sklearn_ensemble_adaboostregressor.rst
    - api_ibex_sklearn_cluster_kmeans.rst
    - api_ibex_sklearn_preprocessing_functiontransformer.rst
    - api_ibex_sklearn_decomposition_fastica.rst
    - api_ibex_sklearn_naive_bayes_basenb.rst
    - api_ibex_sklearn_gaussian_process_gaussianprocess.rst
    - api_ibex_sklearn_neural_network_mlpclassifier.rst
    - api_ibex_sklearn_pipeline_featureunion.rst
    - api_ibex_sklearn_ensemble_extratreesregressor.rst
    - api_ibex_sklearn_random_projection_gaussianrandomprojection.rst
    - api_ibex_sklearn_linear_model_multitasklassocv.rst
    - api_ibex_sklearn_ensemble_baggingregressor.rst
    - api_ibex_sklearn_linear_model_lassolarsic.rst
    - api_ibex_sklearn_decomposition_minibatchdictionarylearning.rst
    - api_ibex_sklearn_neighbors_radiusneighborsclassifier.rst
    - api_ibex_sklearn_linear_model_ransacregressor.rst
    - api_ibex_sklearn_preprocessing_quantiletransformer.rst
    - api_ibex_sklearn_decomposition_dictionarylearning.rst
    - api_ibex_sklearn_neighbors_localoutlierfactor.rst
    - api_ibex_sklearn_linear_model_sgdclassifier.rst
    - api_ibex_sklearn_decomposition_sparsepca.rst
    - api_ibex_sklearn_preprocessing_kernelcenterer.rst
    - api_ibex_sklearn_pipeline_pipeline.rst
    - api_ibex_sklearn_covariance_shrunkcovariance.rst
    - api_ibex_sklearn_semi_supervised_labelspreading.rst
    - api_ibex_sklearn_multioutput_baseestimator.rst
    - api_ibex_frame.rst
    - api_ibex_sklearn_cluster_agglomerativeclustering.rst
    - api_ibex_sklearn_kernel_approximation_baseestimator.rst
    - api_ibex_sklearn_grid_search_randomizedsearchcv.rst
    - api_ibex_sklearn_kernel_approximation_skewedchi2sampler.rst
    - api_ibex_sklearn_ensemble_baggingclassifier.rst
    - api_ibex_sklearn_linear_model_bayesianridge.rst
    - api_ibex_sklearn_discriminant_analysis_baseestimator.rst
    - api_ibex_sklearn_linear_model_multitasklasso.rst
    - api_ibex_sklearn_discriminant_analysis_quadraticdiscriminantanalysis.rst
    - api_ibex_sklearn_linear_model_lassolars.rst
    - api_ibex_sklearn_naive_bayes_basediscretenb.rst
    - index.rst
    - conf.py
    - api_ibex_sklearn_neighbors_nearestcentroid.rst
    - api_ibex_sklearn_dummy_dummyclassifier.rst
    - api_ibex_sklearn_feature_selection_rfecv.rst
    - api_ibex_sklearn_random_projection_baseestimator.rst
    - api_ibex_sklearn_linear_model_ridgeclassifier.rst
    - api_ibex_sklearn_linear_model_multitaskelasticnetcv.rst
    - frame_adapter.rst
    - api_ibex_sklearn_svm_nusvr.rst
    - api_ibex_sklearn_cluster_spectralclustering.rst
    - api_class.rst.jinja2
    - api_ibex_sklearn_random_projection_baserandomprojection.rst
    - api_ibex_sklearn_multiclass__constantpredictor.rst
    - api_ibex_sklearn_preprocessing_labelbinarizer.rst
    - api_ibex_frame_mixin.rst
    - api_ibex_sklearn_preprocessing_polynomialfeatures.rst
    - extending.rst
    - api_ibex_sklearn_model_selection_randomizedsearchcv.rst
    - api_ibex_sklearn_linear_model_orthogonalmatchingpursuitcv.rst
    - api_ibex_sklearn_cross_decomposition_cca.rst
    - api_ibex_sklearn_multioutput_multioutputestimator.rst
    - api_ibex_sklearn_random_projection_sparserandomprojection.rst
    - api_ibex_sklearn_tree_decisiontreeregressor.rst
    - api_ibex_sklearn_mixture_vbgmm.rst
    - api_ibex_sklearn_manifold_isomap.rst
    - xgboost.rst
    - api_ibex_sklearn_naive_bayes_baseestimator.rst
    - api_ibex_sklearn_multiclass_baseestimator.rst
    - api_ibex_sklearn_linear_model_perceptron.rst
    - api_ibex_sklearn_isotonic_baseestimator.rst
    - api_ibex_sklearn_discriminant_analysis_standardscaler.rst
    - api_ibex_sklearn_dummy_dummyregressor.rst
    - api_ibex_sklearn_linear_model_randomizedlogisticregression.rst
    - api_ibex_sklearn_manifold_spectralembedding.rst
    - api_ibex_sklearn_preprocessing_standardscaler.rst
    - api_ibex_sklearn_decomposition_sparsecoder.rst
    - api_ibex_sklearn_linear_model_ardregression.rst
    - api_ibex_trans.rst
    - function_transformer.rst
    - api_ibex_sklearn_neighbors_kerneldensity.rst
    - _static
      - logo.ico
      - got_frame.jpeg
      - logo.jpeg
    - api_ibex_sklearn_svm_svc.rst
    - api_ibex_sklearn_preprocessing_labelencoder.rst
    - api_ibex_sklearn_ensemble_gradientboostingregressor.rst
    - api_ibex_sklearn_tree_extratreeregressor.rst
    - api_ibex_sklearn_mixture_bayesiangaussianmixture.rst
    - api_overview.rst
    - api_ibex_sklearn_covariance_graphlassocv.rst
    - api_ibex_sklearn_cross_decomposition_plssvd.rst
    - api_ibex_sklearn_neural_network_mlpregressor.rst
    - api_ibex_sklearn_dummy_baseestimator.rst
    - api_ibex_sklearn_manifold_mds.rst
    - api_ibex_sklearn_feature_extraction_featurehasher.rst
    - api_ibex_sklearn_linear_model_randomizedlasso.rst
    - api_ibex_sklearn_model_selection_gridsearchcv.rst
    - api_ibex_sklearn_multioutput_classifierchain.rst
    - api_ibex_sklearn_linear_model_lars.rst
    - api_ibex_sklearn_decomposition_factoranalysis.rst
  - requirements.txt
- LICENSE.txt

from __future__ import absolute_import


import string
import collections
import functools
import itertools
import os
import threading

import pandas as pd
from sklearn import base
from sklearn import pipeline
from sklearn.externals import joblib
try:
    from sklearn.exceptions import NotFittedError
except ImportError:
    from sklearn.utils.validation import NotFittedError # Older Versions

from ._utils import verify_x_type, verify_y_type


__all__ = []


def _get_iris_example_doc_preamble_(
        is_regressor,
        is_classifier,
        is_transformer,
        is_clusterer,
        indent):

    if is_classifier:
        return  """
        Example:

            >>> import numpy as np
            >>> from sklearn import datasets
            >>> import pandas as pd
            >>>
            >>> iris = datasets.load_iris()
            >>> features, targets, iris = iris['feature_names'], iris['target_names'], pd.DataFrame(
            ...     np.c_[iris['data'], iris['target']],
            ...     columns=iris['feature_names']+['class'])
            >>> iris['class'] = iris['class'].map(pd.Series(targets))
            >>>
            >>> iris.head()
                            sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm) \
            0                5.1               3.5                1.4               0.2
            1                4.9               3.0                1.4               0.2
            2                4.7               3.2                1.3               0.2
            3                4.6               3.1                1.5               0.2
            4                5.0               3.6                1.4               0.2
            <BLANKLINE>
                class
            0  setosa
            1  setosa
            2  setosa
            3  setosa
            4  setosa

        """

    return """
        Example:

            >>> import pandas as pd
            >>> import numpy as np
            >>> from ibex.sklearn import datasets
            >>> from ibex.sklearn.linear_model import LinearRegression as PdLinearRegression

            >>> iris = datasets.load_iris()
            >>> features = iris['feature_names']
            >>> iris = pd.DataFrame(
            ...     np.c_[iris['data'], iris['target']],
            ...     columns=features+['class'])

            >>> iris[features]
                            sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
            0                5.1               3.5                1.4               0.2
            1                4.9               3.0                1.4               0.2
            2                4.7               3.2                1.3               0.2
            3                4.6               3.1                1.5               0.2
            4                5.0               3.6                1.4               0.2
            ...

    """


def _get_fit_doc(
        orig,
        name,
        est,
        kwargs,
        is_regressor,
        is_classifier,
        is_transformer,
        is_clusterer,
        has_dataframe_y):

    doc = _get_iris_example_doc_preamble_(
        is_regressor,
        is_classifier,
        is_transformer,
        is_clusterer,
        indent=0) + \
    string.Template(
    r"""
        >>>
        >>> from ibex.sklearn import $orig as pd_$orig
        >>>
        >>> prd =  pd_$orig.$name($kwargs).fit(iris[features], iris['class'])

    """).substitute({
        'orig': orig,
        'name': name,
        'est': est,
        'kwargs': kwargs,
        'is_regressor': is_regressor,
        'is_classifier': is_classifier,
        'is_transformer': is_transformer,
        'is_clusterer': is_clusterer})


    if has_dataframe_y:
        doc += string.Template(
    r"""


    Example:

        >>> from ibex.sklearn import $orig as pd_$orig
        >>>
        >>> prd =  pd_$orig.$name($kwargs).fit(iris[features], iris['class'])

    """).substitute({
        'orig': orig,
        'name': name,
        'est': est,
        'kwargs': kwargs,
        'is_regressor': is_regressor,
        'is_classifier': is_classifier,
        'is_transformer': is_transformer,
        'is_clusterer': is_clusterer})

    return doc


# Tmp Ami - uts, docs
def _make_pipeline_steps(objs):
    names = [type(o).__name__.lower() for o in objs]
    name_counts = collections.Counter(names)
    name_inds = name_counts.copy()
    unique_names = []
    for name in names:
        if name_counts[name] > 1:
            unique_names.append(name + '_' + str(name_counts[name] - name_inds[name]))
            name_inds[name] -= 1
        else:
            unique_names.append(name)

    return list(zip(unique_names, objs))


class FrameMixin(object):
    """
    A base class for steps taking pandas entities, not numpy entities.

    Subclass this step to indicate that a step takes pandas entities.

    Example:

        This is a simple, illustrative "identity" transformer,
        which simply relays its input.

        >>> import pandas as pd
        >>> from sklearn import base
        >>> import ibex
        >>>
        >>> class Id(
        ...            base.BaseEstimator, # (1)
        ...            base.TransformerMixin, # (2)
        ...            ibex.FrameMixin): # (3)
        ...
        ...     def fit(self, X, y=None):
        ...         self.x_columns = X.columns # (4)
        ...         if y is not None and isinstance(y, pd.DataFrame):
        ...             self.y_columns = y.columns
        ...         return self
        ...
        ...     def transform(self, X, *args, **kwargs):
        ...         return X[self.x_columns] # (5)

        Note the following general points:

        1. We subclass :class:`sklearn.base.BaseEstimator`, as this is an estimator.

        2. We subclass :class:`sklearn.base.TransformerMixin`, as, in this case, this is specifically a transformer.

        3. We subclass :class:`ibex.FrameMixin`, as this estimator deals with ``pandas`` entities.

        4. In ``fit``, we make sure to set :py:attr:`ibex.FrameMixin.x_columns`;, and, if relevant,
        :py:attr:`ibex.FrameMixin.y_columns` (if ``y`` is a :class:`pandas.DataFrame`); this will ensure that the
        transformer will "remember" the columns it should see in further calls.

        5. In ``transform``, we first use ``x_columns``. This will verify the columns of ``X``, and also reorder
        them according to the original order seen in ``fit`` (if needed).

        Suppose we define two :class:`pandas.DataFrame` objects, ``X_1`` and ``X_2``, with different columns:

        >>> import pandas as pd
        >>>
        >>> X_1 = pd.DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]})
        >>> X_2 = X_1.rename(columns={'b': 'd'})

        The following ``fit``-``transform`` combination will work:

        >>> Id().fit(X_1).transform(X_1)
        a  b
        0  1  3
        1  2  4
        2  3  5

        The following ``fit``-``transform`` combination will fail:

        >>> try:
        ...     Id().fit(X_1).transform(X_2)
        ... except KeyError:
        ...     print('caught')
        caught

        The following ``transform`` will fail, as the estimator was not fitted:

        >>> try:
        ...     from sklearn.exceptions import NotFittedError
        ... except ImportError:
        ...     from sklearn.utils.validation import NotFittedError # Older Versions
        >>> try:
        ...     Id().transform(X_2)
        ... except NotFittedError:
        ...     print('caught')
        caught

        Steps can be piped into each other:

        >>> (Id() | Id()).fit(X_1).transform(X_1)
        a  b
        0  1  3
        1  2  4
        2  3  5

        Steps can be added:

        >>> (Id() + Id()).fit(X_1).transform(X_1)
             id_0     id_1
           a  b  a  b
        0  1  3  1  3
        1  2  4  2  4
        2  3  5  3  5
    """

    @property
    def x_columns(self):
        """
        The X columns set in the last call to fit.

        Set this property at fit, and call it in other methods:

        """
        try:
            return self.__x_cols
        except AttributeError:
            raise NotFittedError()

    @x_columns.setter
    def x_columns(self, columns):
        self.__x_cols = columns

    @property
    def y_columns(self):
        """
        The y columns set in the last call to fit.

        Set this property at fit, and call it in other methods:

        .. versionadded:: 0.1.2

        """
        try:
            return self.__y_cols
        except AttributeError:
            raise NotFittedError()

    @y_columns.setter
    def y_columns(self, columns):
        self.__y_cols = columns

    def __or__(self, other):
        """
        Pipes the result of this step to other.

        Arguments:
            other: A different step object whose class subclasses this one.

        Returns:
            :py:class:`ibex.sklearn.pipeline.Pipeline`
        """

        if isinstance(self, Pipeline):
            selfs = [e[1] for e in self.steps]
        else:
            selfs = [self]

        if isinstance(other, Pipeline):
            others = [e[1] for e in other.steps]
        else:
            others = [other]

        combined = selfs + others

        return Pipeline(_make_pipeline_steps(combined))

    def __add__(self, other):
        """

        Returns:
            :py:class:`ibex.sklearn.pipeline.FeatureUnion`
        """

        if isinstance(self, FeatureUnion):
            self_features = [e[1] for e in self.transformer_list]
        else:
            self_features = [self]

        if isinstance(other, FeatureUnion):
            other_features = [e[1] for e in other.transformer_list]
        else:
            other_features = [other]

        combined = self_features + other_features

        return FeatureUnion(_make_pipeline_steps(combined))


__all__ += ['FrameMixin']


def _transform(transformer, weight, X, *args, **kwargs):
    res = transformer.transform(X, *args, **kwargs)
    if weight is not None:
        res *= weight
    return res


def _fit_transform(transformer, weight, X, y, *args, **kwargs):
    if hasattr(transformer, 'fit_transform'):
        res = transformer.fit_transform(X, y, *args, **kwargs)
    else:
        res = transformer.fit(X, y, *args, **kwargs).transform(X)
    if weight is not None:
        res *= weight
    return res


class FeatureUnion(pipeline.FeatureUnion, base.TransformerMixin, FrameMixin):
    """
    Concatenates results of multiple transformer objects.
    This estimator applies a list of transformer objects in parallel to the
    input data, then concatenates the results. This is useful to combine
    several feature extraction mechanisms into a single transformer.

    Arguments:

        transformer_list: list of (string, transformer) tuples.
            List of transformer objects to be applied to the data.
            The first half of each tuple is the name of the transformer.

        n_jobs: int, optional.
            Number of jobs to run in parallel (default 1).

        transformer_weights: dict, optional
            Multiplicative weights for features per transformer.
            Keys are transformer names, values the weights.

    Example:

        >>> import pandas as pd
        >>> X = pd.DataFrame({'a': [1, 2, 3], 'b': [10, -3, 4]})

        >>> from ibex.sklearn import preprocessing as pd_preprocessing
        >>> from ibex.sklearn import pipeline as pd_pipeline

        >>> trn = pd_pipeline.FeatureUnion([
        ...     ('std', pd_preprocessing.StandardScaler()),
        ...     ('abs', pd_preprocessing.MaxAbsScaler())])
        >>> trn.fit_transform(X)
              std                 abs
                a         b         a    b
        0 -1.224745  1.192166  0.333333  1.0
        1  0.000000 -1.254912  0.666667 -0.3
        2  1.224745  0.062746  1.000000  0.4

        >>> from ibex import trans
        >>>
        >>> trn = pd_preprocessing.StandardScaler() + pd_preprocessing.MaxAbsScaler()
        >>> trn.fit_transform(X)
              standardscaler           maxabsscaler
                a         b         a    b
        0 -1.224745  1.192166  0.333333  1.0
        1  0.000000 -1.254912  0.666667 -0.3
        2  1.224745  0.062746  1.000000  0.4

        >>> trn = trans(pd_preprocessing.StandardScaler(), out_cols=['std_scale_a', 'std_scale_b'])
        >>> trn += trans(pd_preprocessing.MaxAbsScaler(), out_cols=['max_scale_a', 'max_scale_b'])
        >>> trn.fit_transform(X)
        functiontransformer_0             functiontransformer_1
        std_scale_a  std_scale_b  max_scale_a  max_scale_b
        0    -1.224745     1.192166     0.333333          1.0
        1     0.000000    -1.254912     0.666667         -0.3
        2     1.224745     0.062746     1.000000          0.4
    """
    # Tmp Ami - document as_index
    def __init__(self, transformer_list, n_jobs=1, transformer_weights=None, as_index=True):
        pipeline.FeatureUnion.__init__(
            self,
            transformer_list,
            n_jobs,
            transformer_weights)
        FrameMixin.__init__(self)
        self._as_index = as_index

    # Tmp Ami - get docstrings from sklearn.
    def fit_transform(self, X, y=None, **fit_params):
        """
        Fits the transformer using ``X`` (and possibly ``y``). Transforms
        ``X`` using the transformers, uses :func:`pandas.concat`
        to horizontally concatenate the results.

        Returns:

            ``self``
        """
        verify_x_type(X)
        verify_y_type(y)

        Xts = joblib.Parallel(n_jobs=self.n_jobs)(
            joblib.delayed(_fit_transform)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter())
        return self.__concat(Xts)

    def transform(self, X, *args, **kwargs):
        """
        Transforms ``X`` using the transformers, uses :func:`pandas.concat`
        to horizontally concatenate the results.
        """
        verify_x_type(X)

        Xts = joblib.Parallel(n_jobs=self.n_jobs)(
            joblib.delayed(_transform)(trans, weight, X, *args, **kwargs) for _, trans, weight in self._iter())
        return self.__concat(Xts)

    def _iter(self):
        weights = self.transformer_weights
        if weights is None:
            weights = {}
        return ((name, trans, weights.get(name, None)) for name, trans in self.transformer_list)

    def __concat(self, Xts):
        conc = pd.concat(Xts, axis=1)

        cols = conc.columns
        tups = [(c, ) if not isinstance(c, tuple) else c for c in cols]
        max_tup_len = max(len(t) for t in tups)
        tups = [c + ('', ) * (max_tup_len - len(c)) for c in tups]

        if self._as_index:
            names = [name for (name, _, _) in self._iter()]
            mults = [len(X.columns) for X in Xts]
            tup_heads = [(name, ) * m for (name, m) in zip(names, mults)]
            tup_heads = list(itertools.chain.from_iterable(tup_heads))
            tups = [(h, ) + t for h, t in zip(tup_heads, tups)]

        conc.columns = pd.MultiIndex.from_tuples(tups)

        return conc


FeatureUnion.__name__ = 'FeatureUnion'

_wrapped = [
    'get_feature_names',
    'get_params',
    'set_params',
]

for wrap in _wrapped:
    try:
        functools.update_wrapper(getattr(FeatureUnion, wrap), getattr(pipeline.FeatureUnion, wrap))
    except AttributeError:
        pass


__all__ += ['FeatureUnion']


class Pipeline(pipeline.Pipeline, FrameMixin):
    pass



class InOpChecker(object):
    def __init__(self, f_name):
        flag = '_ibex_adapter_in_op_%s' % hash(os.path.abspath(f_name))
        self.__set = getattr(threading.local(), flag, set())

    def __contains__(self, est):
        return id(est) in self.__set

    def add(self, est):
        self.__set.add(id(est))

    def remove(self, est):
        self.__set.remove(id(est))


__all__ += ['Pipeline']