Python sklearn.utils.validation.column_or_1d() Examples

The following are 21 code examples of sklearn.utils.validation.column_or_1d(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils.validation , or try the search function .
Example #1
Source File: utils.py    From pmdarima with MIT License 6 votes vote down vote up
def is_constant(x):
    """Test ``x`` for constancy.

    Determine whether a vector is composed of all of the same elements
    and nothing else.

    Parameters
    ----------
    x : array-like, shape=(n_samples,)
        The time series vector.

    Examples
    --------
    >>> import numpy as np
    >>> x = np.array([1, 2, 3])
    >>> y = np.ones(3)
    >>> [is_constant(x), is_constant(y)]
    [False, True]
    """
    x = column_or_1d(x)  # type: np.ndarray
    return (x == x[0]).all() 
Example #2
Source File: Estimator.py    From tbats with MIT License 6 votes vote down vote up
def _validate(self, y):
        """Validates input time series. Also adjusts box_cox if necessary."""
        try:
            y = c1d(check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1,
                                copy=True, dtype=np.float64))  # type: np.ndarray
        except Exception as validation_exception:
            self.context.get_exception_handler().exception(
                "y series is invalid", error.InputArgsException, previous_exception=validation_exception
            )
            return False

        if np.any(y <= 0):
            if self.use_box_cox is True:
                self.context.get_exception_handler().warn(
                    "Box-Cox transformation (use_box_cox) was forced to True "
                    "but there are negative values in input series. "
                    "Setting use_box_cox to False.",
                    error.InputArgsWarning
                )
            self.use_box_cox = False

        return y 
Example #3
Source File: transformations.py    From keras-pandas with MIT License 6 votes vote down vote up
def fit(self, y):
        """Fit label encoder

        Parameters
        ----------
        y : array-like of shape (n_samples,)
            Target values.

        Returns
        -------
        self : returns an instance of self.
        """
        y = column_or_1d(y, warn=True)
        y = numpy.append(y, ['UNK'])
        self.classes_ = numpy.unique(y)
        return self 
Example #4
Source File: transformations.py    From keras-pandas with MIT License 6 votes vote down vote up
def fit_transform(self, y, **kwargs):
        """Fit label encoder and return encoded labels

        Parameters
        ----------
        y : array-like of shape [n_samples]
            Target values.

        Returns
        -------
        y : array-like of shape [n_samples]
        :param **kwargs:
        """
        y = column_or_1d(y, warn=True)
        y = numpy.append(y, ['UNK'])
        self.classes_, y = numpy.unique(y, return_inverse=True)
        return y 
Example #5
Source File: transformations.py    From keras-pandas with MIT License 6 votes vote down vote up
def transform(self, y):
        """Transform labels to normalized encoding.

        Parameters
        ----------
        y : array-like of shape [n_samples]
            Target values.

        Returns
        -------
        y : array-like of shape [n_samples]
        """
        check_is_fitted(self, 'classes_')
        y = column_or_1d(y, warn=True)
        y = numpy.array(list(map(lambda x: x if x in self.classes_ else 'UNK', y)))

        classes = numpy.unique(y)
        if len(numpy.intersect1d(classes, self.classes_)) < len(classes):
            diff = numpy.setdiff1d(classes, self.classes_)
            raise ValueError("y contains new labels: %s" % str(diff))
        return numpy.searchsorted(self.classes_, y) 
Example #6
Source File: ConditionMortalityPredictor.py    From CDSS with GNU General Public License v3.0 6 votes vote down vote up
def _select_features(self):
        # Use FeatureSelector to prune all but 100 variables.
        fs = FeatureSelector(algorithm=FeatureSelector.RECURSIVE_ELIMINATION, \
            problem=FeatureSelector.CLASSIFICATION)

        fs.set_input_matrix(self._X_train, column_or_1d(self._y_train))
        num_features_to_select = int(0.01*len(self._X_train.columns.values))
        fs.select(k=num_features_to_select)

        # Enumerate eliminated features pre-transformation.
        self._feature_ranks = fs.compute_ranks()
        for i in range(len(self._feature_ranks)):
            if self._feature_ranks[i] > num_features_to_select:
                self._eliminated_features.append(self._X_train.columns[i])

        self._X_train = fs.transform_matrix(self._X_train)
        self._X_test = fs.transform_matrix(self._X_test) 
Example #7
Source File: SupervisedLearningPipeline.py    From CDSS with GNU General Public License v3.0 6 votes vote down vote up
def _train_predictor(self, problem, classes=None, hyperparams=None):
        if problem == SupervisedLearningPipeline.CLASSIFICATION:
            if 'bifurcated' in hyperparams['algorithm']:
                learning_class = BifurcatedSupervisedClassifier
                # Strip 'bifurcated-' from algorithm for SupervisedClassifier.
                hyperparams['algorithm'] = '-'.join(hyperparams['algorithm'].split('-')[1:])
            else:
                learning_class = SupervisedClassifier

            self._predictor = learning_class(classes, hyperparams)
        elif problem == SupervisedLearningPipeline.REGRESSION:
            learning_class = Regressor
            self._predictor = learning_class(algorithm=algorithm)
        status = self._predictor.train(self._X_train, column_or_1d(self._y_train),
                                       groups = self._patIds_train)

        return status 
Example #8
Source File: TestClassifierAnalyzer.py    From CDSS with GNU General Public License v3.0 6 votes vote down vote up
def setUp(self):
        log.level = logging.ERROR
        # Use simple classifier and test case for testing non-ROC analyses.
        X = RANDOM_10_TEST_CASE['X']
        y = RANDOM_10_TEST_CASE['y']
        self._list_classifier = ListPredictor([0, 1])
        self._lc_analyzer = ClassifierAnalyzer(self._list_classifier, X, y)

        # Use ml classifier and complex test case.
        X = RANDOM_100_TEST_CASE['X']
        y = RANDOM_100_TEST_CASE['y']
        # Generate train/test split.
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123456789)
        # Train logistic regression model.
        hyperparams = {
            'algorithm': SupervisedClassifier.REGRESS_AND_ROUND,
            'random_state': 123456789
        }
        self._ml_classifier = SupervisedClassifier([0, 1], hyperparams)
        self._ml_classifier.train(X_train, column_or_1d(y_train))
        self._ml_analyzer = ClassifierAnalyzer(self._ml_classifier, X_test, y_test) 
Example #9
Source File: array.py    From pmdarima with MIT License 5 votes vote down vote up
def check_endog(y, dtype=DTYPE, copy=True, force_all_finite=False):
    """Wrapper for ``check_array`` and ``column_or_1d`` from sklearn

    Parameters
    ----------
    y : array-like, shape=(n_samples,)
        The 1d endogenous array.

    dtype : string, type or None (default=np.float64)
        Data type of result. If None, the dtype of the input is preserved.
        If "numeric", dtype is preserved unless array.dtype is object.

    copy : bool, optional (default=False)
        Whether a forced copy will be triggered. If copy=False, a copy might
        still be triggered by a conversion.

    force_all_finite : bool, optional (default=False)
        Whether to raise an error on np.inf and np.nan in an array. The
        possibilities are:

        - True: Force all values of array to be finite.
        - False: accept both np.inf and np.nan in array.

    Returns
    -------
    y : np.ndarray, shape=(n_samples,)
        A 1d numpy ndarray
    """
    return column_or_1d(
        check_array(y, ensure_2d=False, force_all_finite=force_all_finite,
                    copy=copy, dtype=dtype))  # type: np.ndarray 
Example #10
Source File: BoxCox.py    From tbats with MIT License 5 votes vote down vote up
def boxcox(y, lam=None, seasonal_periods=None, bounds=(-1, 2)):
    y = c1d(check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1,
                        copy=False, dtype=np.float64))  # type: np.ndarray
    if lam is None:
        lam = find_box_cox_lambda(y, seasonal_periods=seasonal_periods, bounds=bounds)
    if lam <= 0 and np.any(y <= 0):
        raise error.InputArgsException('y must have only positive values for box-cox transformation.')
    if np.isclose(0.0, lam):
        return np.log(y)
    return (np.sign(y) * (np.abs(y) ** lam) - 1) / lam 
Example #11
Source File: BoxCox.py    From tbats with MIT License 5 votes vote down vote up
def find_box_cox_lambda(y, seasonal_periods=None, bounds=(-1, 2)):
    y = c1d(check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1,
                        copy=False, dtype=np.float64))  # type: np.ndarray

    guerrero = Guerrero()
    return guerrero.find_lambda(y, seasonal_periods=seasonal_periods, bounds=bounds) 
Example #12
Source File: Estimator.py    From tbats with MIT License 5 votes vote down vote up
def _normalize_seasonal_periods_to_type(self, seasonal_periods, dtype):
        """Validates seasonal periods and normalizes them

        Normalization ensures periods are of proper type, unique and sorted.
        """
        if seasonal_periods is not None:
            try:
                seasonal_periods = c1d(check_array(seasonal_periods, ensure_2d=False, force_all_finite=True,
                                                   ensure_min_samples=0,
                                                   copy=True, dtype=dtype))
            except Exception as validation_exception:
                self.context.get_exception_handler().exception("seasonal_periods definition is invalid",
                                                               error.InputArgsException,
                                                               previous_exception=validation_exception)

            seasonal_periods = np.unique(seasonal_periods)
            if len(seasonal_periods[np.where(seasonal_periods <= 1)]) > 0:
                self.context.get_exception_handler().warn(
                    "All seasonal periods should be values greater than 1. "
                    "Ignoring all seasonal period values that do not meet this condition.",
                    error.InputArgsWarning
                )
            seasonal_periods = seasonal_periods[np.where(seasonal_periods > 1)]
            seasonal_periods.sort()
            if len(seasonal_periods) == 0:
                seasonal_periods = None
        return seasonal_periods 
Example #13
Source File: utils.py    From AIF360 with Apache License 2.0 5 votes vote down vote up
def check_inputs(X, y, sample_weight=None, ensure_2d=True):
    """Input validation for debiasing algorithms.

    Checks all inputs for consistent length, validates shapes (optional for X),
    and returns an array of all ones if sample_weight is ``None``.

    Args:
        X (array-like): Input data.
        y (array-like, shape = (n_samples,)): Target values.
        sample_weight (array-like, optional): Sample weights.
        ensure_2d (bool, optional): Whether to raise a ValueError if X is not
            2D.

    Returns:
        tuple:

            * **X** (`array-like`) -- Validated X. Unchanged.

            * **y** (`array-like`) -- Validated y. Possibly converted to 1D if
              not a :class:`pandas.Series`.
            * **sample_weight** (`array-like`) -- Validated sample_weight. If no
              sample_weight is provided, returns a consistent-length array of
              ones.
    """
    if ensure_2d and X.ndim != 2:
        raise ValueError("Expected X to be 2D, got ndim == {} instead.".format(
                X.ndim))
    if not isinstance(y, pd.Series):  # don't cast Series -> ndarray
        y = column_or_1d(y)
    if sample_weight is not None:
        sample_weight = column_or_1d(sample_weight)
    else:
        sample_weight = np.ones(X.shape[0])
    check_consistent_length(X, y, sample_weight)
    return X, y, sample_weight 
Example #14
Source File: SupervisedClassifier.py    From CDSS with GNU General Public License v3.0 5 votes vote down vote up
def _maybe_reshape_y(self, y):
        # If necessary, reshape y from (n_samples, 1) to (n_samples, )
        try:
            num_cols = y.shape[1]
            y = column_or_1d(y)
            log.debug('Reshaped y to 1d.')
        except IndexError:
            log.debug('Did not need to reshape y to 1d.')

        return y 
Example #15
Source File: encoders.py    From sagemaker-scikit-learn-extension with Apache License 2.0 5 votes vote down vote up
def inverse_transform(self, y):
        """Transform labels back to original encoding.

        If ``self.fill_unseen_labels`` is ``True``, use ``self.fill_label_value`` for unseen values.

        Parameters
        ----------
        y : numpy array of shape [n_samples]
            Encoded label values.

        Returns
        -------
        y_decoded : numpy array of shape [n_samples]
                    Label values.
        """
        check_is_fitted(self, "classes_")
        y = column_or_1d(y, warn=True)

        if y.dtype.kind not in ("i", "u"):
            try:
                y = y.astype(np.float).astype(np.int)
            except ValueError:
                raise ValueError("`y` contains values not convertible to integer.")

        # inverse transform of empty array is empty array
        if _num_samples(y) == 0:
            return np.array([])

        labels = np.arange(len(self.classes_))
        diff = np.setdiff1d(y, labels)

        if diff and not self.fill_unseen_labels:
            raise ValueError("y contains previously unseen labels: %s" % str(diff))

        y_decoded = [self.classes_[idx] if idx in labels else self.fill_label_value for idx in y]
        return y_decoded 
Example #16
Source File: encoders.py    From sagemaker-scikit-learn-extension with Apache License 2.0 5 votes vote down vote up
def transform(self, y):
        """Transform labels to normalized encoding.

        If ``self.fill_unseen_labels`` is ``True``, use ``self.fill_encoded_label_value`` for unseen values.
        Seen labels are encoded with value between 0 and n_classes-1.  Unseen labels are encoded with
        ``self.fill_encoded_label_value`` with a default value of n_classes.

        Parameters
        ----------
        y : array-like of shape [n_samples]
            Label values.

        Returns
        -------
        y_encoded : array-like of shape [n_samples]
                    Encoded label values.
        """
        check_is_fitted(self, "classes_")
        y = column_or_1d(y, warn=True)

        # transform of empty array is empty array
        if _num_samples(y) == 0:
            return np.array([])

        if self.fill_unseen_labels:
            _, mask = _encode_check_unknown(y, self.classes_, return_mask=True)
            y_encoded = np.searchsorted(self.classes_, y)
            fill_encoded_label_value = self.fill_encoded_label_value or len(self.classes_)
            y_encoded[~mask] = fill_encoded_label_value
        else:
            _, y_encoded = _encode(y, uniques=self.classes_, encode=True)

        return y_encoded 
Example #17
Source File: encoders.py    From sagemaker-scikit-learn-extension with Apache License 2.0 5 votes vote down vote up
def fit(self, y):
        """Fit label encoder.

        Parameters
        ----------
        y : array-like of shape (n_samples,)
            Label values.

        Returns
        -------
        self : RobustLabelEncoder.
        """
        y = column_or_1d(y, warn=True)
        self.classes_ = self._check_labels_and_sort() or _encode(y)
        return self 
Example #18
Source File: array.py    From pmdarima with MIT License 5 votes vote down vote up
def as_series(x):
    """Cast as pandas Series.

    Cast an iterable to a Pandas Series object. Note that the index
    will simply be a positional ``arange`` and cannot be set in this
    function.

    Parameters
    ----------
    x : array-like, shape=(n_samples,)
        The 1d array on which to compute the auto correlation.

    Examples
    --------
    >>> as_series([1, 2, 3])
    0    1
    1    2
    2    3
    dtype: int64

    >>> as_series(as_series((1, 2, 3)))
    0    1
    1    2
    2    3
    dtype: int64

    >>> import pandas as pd
    >>> as_series(pd.Series([4, 5, 6], index=['a', 'b', 'c']))
    a    4
    b    5
    c    6
    dtype: int64

    Returns
    -------
    s : pd.Series
        A pandas Series object.
    """
    if isinstance(x, pd.Series):
        return x
    return pd.Series(column_or_1d(x)) 
Example #19
Source File: SupervisedLearningPipeline.py    From CDSS with GNU General Public License v3.0 4 votes vote down vote up
def _select_features(self, problem, percent_features_to_select, algorithm, features_to_keep=None):
        # Initialize FeatureSelector.
        fs = FeatureSelector(problem=problem, algorithm=algorithm, random_state=self._random_state)
        fs.set_input_matrix(self._X_train, column_or_1d(self._y_train))
        num_features_to_select = int(percent_features_to_select*len(self._X_train.columns.values))

        # Parse features_to_keep.
        if features_to_keep is None:
            features_to_keep = []

        # Select features.
        fs.select(k=num_features_to_select)

        # Enumerate eliminated features pre-transformation.
        feature_ranks = fs.compute_ranks()
        for i in range(len(feature_ranks)):
            if feature_ranks[i] > num_features_to_select:
                # If in features_to_keep, pretend it wasn't eliminated.
                if self._X_train.columns[i] not in features_to_keep:
                    self._eliminated_features.append(self._X_train.columns[i])

        # Hack: rather than making FeatureSelector handle the concept of
        # kept features, just copy the data here and add it back to the
        # transformed matrices.
        # Rather than looping, do this individually so that we can skip if
        # transformed X already has the feature.

        # for feature in features_to_keep:
        kept_X_train_feature = self._X_train[features_to_keep].copy()
        log.debug('kept_X_train_feature.shape: %s' % str(kept_X_train_feature.shape))
        self._X_train = fs.transform_matrix(self._X_train)
        for feature in features_to_keep:
            if feature not in self._X_train:
                self._X_train = self._X_train.merge(kept_X_train_feature[[feature]], left_index=True, right_index=True)

        kept_X_test_feature = self._X_test[features_to_keep].copy()
        log.debug('kept_X_test_feature.shape: %s' % str(kept_X_test_feature.shape))
        self._X_test = fs.transform_matrix(self._X_test)
        for feature in features_to_keep:
            if feature not in self._X_test:
                self._X_test = self._X_test.merge(kept_X_test_feature[[feature]], left_index=True, right_index=True)

        if not features_to_keep:
        # Even if there is no feature to keep, still need to
        # perform transform_matrix to drop most low-rank features
            self._X_train = fs.transform_matrix(self._X_train)
            self._X_test = fs.transform_matrix(self._X_test) 
Example #20
Source File: SupervisedLearner.py    From CDSS with GNU General Public License v3.0 4 votes vote down vote up
def run(self):
        file_organizer = Syst.FileOrganizerLocal(working_folderpath=self.working_folderpath)

        raw_matrix_train, raw_matrix_test = Utils.split_rows(self.input_matrix)

        X_train_raw, y_train = Utils.split_Xy(raw_matrix_train, ylabel=self.ylabel)

        feature_processing_pipeline = Pipeline(
            memory=None,  # file_organizer.cached_pipeline_filepath,
            steps=[
                ('impute_features', Clas.FeatureImputer()),
                ('remove_features', Clas.FeatureRemover()),
                ('select_features', Clas.Select_Features())
            ]
        )
        X_train_processed = feature_processing_pipeline.fit_transform(X_train_raw, y_train)

        predictor = SupervisedClassifier(classes=[0, 1], hyperparams={'algorithm':'random-forest',
                                                                      'hyperparam_strategy':SupervisedClassifier.EXHAUSTIVE_SEARCH,
                                                                      'max_iter': 1024
                                                                      })

        status = predictor.train(X_train_processed, column_or_1d(y_train))

        X_test_raw, y_test = Utils.split_Xy(raw_matrix_test, ylabel=self.ylabel)
        X_test_processed = feature_processing_pipeline.transform(X_test_raw)
        y_test_pred_proba = predictor.predict_probability(X_test_processed)[:, 1]

        res_df = pd.DataFrame({'actual': y_test,
                               'predict': y_test_pred_proba})
        res_df.to_csv(file_organizer.get_output_filepath())

        '''TODO'''
        from scripts.LabTestAnalysis.lab_statistics.stats_utils import get_confusion_metrics
        from sklearn.metrics import roc_auc_score

        AUC = roc_auc_score(y_test, y_test_pred_proba)

        sensitivity, specificity, LR_p, LR_n, PPV, NPV = get_confusion_metrics(actual_labels=y_test.values,
                                                                               predict_probas=y_test_pred_proba,
                                                                               threshold=0.5)
        print("AUC: %s, sensitivity: %s, specificity: %s, LR_p: %s, LR_n: %s, PPV: %s, NPV: %s:. " \
                % (AUC, sensitivity, specificity, LR_p, LR_n, PPV, NPV)) 
Example #21
Source File: Model.py    From tbats with MIT License 4 votes vote down vote up
def _fit_to_observations(self, y, starting_x):
        """Fits model with starting x to time series"""
        self.warnings = []
        self.is_fitted = False

        if self.validate_input:
            try:
                y = c1d(check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1,
                                    copy=True, dtype=np.float64))  # type: np.ndarray
            except Exception as validation_exception:
                self.context.get_exception_handler().exception("y series is invalid",
                                                               error.InputArgsException,
                                                               previous_exception=validation_exception)
        self.y = y
        yw = self._boxcox(y)

        matrix_builder = self.matrix
        w = matrix_builder.make_w_vector()
        g = matrix_builder.make_g_vector()
        F = matrix_builder.make_F_matrix()

        # initialize matrices
        yw_hat = np.asarray([0.0] * len(y))
        # x = np.matrix(np.zeros((len(params.x0), len(yw) + 1)))
        x = starting_x

        with warnings.catch_warnings():
            warnings.filterwarnings('error')
            try:
                for t in range(0, len(y)):
                    yw_hat[t] = w @ x
                    e = yw[t] - yw_hat[t]
                    x = F @ x + g * e
            except RuntimeWarning:
                # calculation issues, values close to max float value
                self.add_warning('Numeric calculation issues detected. Model is not usable.')
                self.is_fitted = False
                return self

        # store fit results
        self.x_last = x
        self.resid_boxcox = yw - yw_hat
        try:
            self.y_hat = self._inv_boxcox(yw_hat)
        except RuntimeWarning:
            self.add_warning('Box-Cox related numeric calculation issues detected. Model is not usable.')
            self.is_fitted = False
            return self

        self.resid = self.y - self.y_hat

        self.is_fitted = True
        self.aic = self.calculate_aic()

        return self