Python sklearn.utils.column_or_1d() Examples

The following are 30 code examples of sklearn.utils.column_or_1d(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils , or try the search function .
Example #1
Source File: test_utils.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_column_or_1d():
    EXAMPLES = [
        ("binary", ["spam", "egg", "spam"]),
        ("binary", [0, 1, 0, 1]),
        ("continuous", np.arange(10) / 20.),
        ("multiclass", [1, 2, 3]),
        ("multiclass", [0, 1, 2, 2, 0]),
        ("multiclass", [[1], [2], [3]]),
        ("multilabel-indicator", [[0, 1, 0], [0, 0, 1]]),
        ("multiclass-multioutput", [[1, 2, 3]]),
        ("multiclass-multioutput", [[1, 1], [2, 2], [3, 1]]),
        ("multiclass-multioutput", [[5, 1], [4, 2], [3, 1]]),
        ("multiclass-multioutput", [[1, 2, 3]]),
        ("continuous-multioutput", np.arange(30).reshape((-1, 3))),
    ]

    for y_type, y in EXAMPLES:
        if y_type in ["binary", 'multiclass', "continuous"]:
            assert_array_equal(column_or_1d(y), np.ravel(y))
        else:
            assert_raises(ValueError, column_or_1d, y) 
Example #2
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 6 votes vote down vote up
def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		if self._empty_fit():
			return self
		if self.dtype is not None:
			X = cast(X, self.dtype)
		mask = self._missing_value_mask(X)
		values, counts = numpy.unique(X[~mask], return_counts = True)
		if self.with_data:
			if (self.missing_value_replacement is not None) and numpy.any(mask) > 0:
				self.data_ = numpy.unique(numpy.append(values, self.missing_value_replacement))
			else:
				self.data_ = values
		if self.with_statistics:
			self.counts_ = _count(mask)
			self.discr_stats_ = (values, counts)
		return self 
Example #3
Source File: calibration.py    From carl with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def predict(self, T):
        """Calibrate data.

        Parameters
        ----------
        * `T` [array-like, shape=(n_samples,)]:
            Data to calibrate.

        Returns
        -------
        * `Tt` [array, shape=(n_samples,)]:
            Calibrated data.
        """
        T = column_or_1d(T).reshape(-1, 1)
        num = self.calibrator1.pdf(T)
        den = self.calibrator0.pdf(T) + self.calibrator1.pdf(T)

        p = num / den
        p[den == 0] = 0.5

        return p 
Example #4
Source File: calibration.py    From carl with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def predict(self, T):
        """Calibrate data.

        Parameters
        ----------
        * `T` [array-like, shape=(n_samples,)]:
            Data to calibrate.

        Returns
        -------
        * `Tt` [array, shape=(n_samples,)]:
            Calibrated data.
        """
        T = column_or_1d(T).reshape(-1, 1)
        num = self.calibrator1.pdf(T)
        den = self.calibrator0.pdf(T) + self.calibrator1.pdf(T)

        p = num / den
        p[den == 0] = 0.5

        return p 
Example #5
Source File: label.py    From sparkit-learn with Apache License 2.0 6 votes vote down vote up
def fit(self, y):
        """Fit label encoder
        Parameters
        ----------
        y : ArrayRDD (n_samples,)
            Target values.
        Returns
        -------
        self : returns an instance of self.
        """

        def mapper(y):
            y = column_or_1d(y, warn=True)
            _check_numpy_unicode_bug(y)
            return np.unique(y)

        def reducer(a, b):
            return np.unique(np.concatenate((a, b)))

        self.classes_ = y.map(mapper).reduce(reducer)

        return self 
Example #6
Source File: labels.py    From pumpp with ISC License 6 votes vote down vote up
def inverse_transform(self, y):
        """Transform labels back to original encoding.

        Parameters
        ----------
        y : numpy array of shape [n_samples]
            Target values.

        Returns
        -------
        y : numpy array of shape [n_samples]
        """
        check_is_fitted(self, 'classes_')
        y = column_or_1d(y, warn=True)
        # inverse transform of empty array is empty array
        if _num_samples(y) == 0:
            return np.array([])

        diff = np.setdiff1d(y, np.arange(len(self.classes_)))
        if len(diff):
            raise ValueError(
                    "y contains previously unseen labels: %s" % str(diff))
        y = np.asarray(y)
        return self.classes_[y] 
Example #7
Source File: labels.py    From pumpp with ISC License 6 votes vote down vote up
def transform(self, y):
        """Transform labels to normalized encoding.

        Parameters
        ----------
        y : array-like of shape [n_samples]
            Target values.

        Returns
        -------
        y : array-like of shape [n_samples]
        """
        check_is_fitted(self, 'classes_')
        y = column_or_1d(y, warn=True)
        # transform of empty array is empty array
        if _num_samples(y) == 0:
            return np.array([])

        _, y = _encode(y, uniques=self.classes_, encode=True)
        return y 
Example #8
Source File: patsy_adaptor.py    From patsylearn with GNU General Public License v2.0 6 votes vote down vote up
def fit(self, data, y=None):
        """Fit the scikit-learn model using the formula.

        Parameters
        ----------
        data : dict-like (pandas dataframe)
            Input data. Contains features and possible labels.
            Column names need to match variables in formula.
        """
        eval_env = EvalEnvironment.capture(self.eval_env, reference=1)
        formula = _drop_intercept(self.formula, self.add_intercept)
        design_y, design_X = dmatrices(formula, data, eval_env=eval_env,
                                       NA_action=self.NA_action)
        self.design_y_ = design_y.design_info
        self.design_X_ = design_X.design_info
        self.feature_names_ = design_X.design_info.column_names
        # convert to 1d vector so we don't get a warning
        # from sklearn.
        design_y = column_or_1d(design_y)
        est = clone(self.estimator)
        self.estimator_ = est.fit(design_X, design_y)
        return self 
Example #9
Source File: data.py    From pyod with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def evaluate_print(clf_name, y, y_pred):
    """Utility function for evaluating and printing the results for examples.
    Default metrics include ROC and Precision @ n

    Parameters
    ----------
    clf_name : str
        The name of the detector.

    y : list or numpy array of shape (n_samples,)
        The ground truth. Binary (0: inliers, 1: outliers).

    y_pred : list or numpy array of shape (n_samples,)
        The raw outlier scores as returned by a fitted model.

    """

    y = column_or_1d(y)
    y_pred = column_or_1d(y_pred)
    check_consistent_length(y, y_pred)

    print('{clf_name} ROC:{roc}, precision @ rank n:{prn}'.format(
        clf_name=clf_name,
        roc=np.round(roc_auc_score(y, y_pred), decimals=4),
        prn=np.round(precision_n_scores(y, y_pred), decimals=4))) 
Example #10
Source File: test_utils.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_column_or_1d():
    EXAMPLES = [
        ("binary", ["spam", "egg", "spam"]),
        ("binary", [0, 1, 0, 1]),
        ("continuous", np.arange(10) / 20.),
        ("multiclass", [1, 2, 3]),
        ("multiclass", [0, 1, 2, 2, 0]),
        ("multiclass", [[1], [2], [3]]),
        ("multilabel-indicator", [[0, 1, 0], [0, 0, 1]]),
        ("multiclass-multioutput", [[1, 2, 3]]),
        ("multiclass-multioutput", [[1, 1], [2, 2], [3, 1]]),
        ("multiclass-multioutput", [[5, 1], [4, 2], [3, 1]]),
        ("multiclass-multioutput", [[1, 2, 3]]),
        ("continuous-multioutput", np.arange(30).reshape((-1, 3))),
    ]

    for y_type, y in EXAMPLES:
        if y_type in ["binary", 'multiclass', "continuous"]:
            assert_array_equal(column_or_1d(y), np.ravel(y))
        else:
            assert_raises(ValueError, column_or_1d, y) 
Example #11
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		self.classes_ = numpy.unique(X[~pandas.isnull(X)])
		return self 
Example #12
Source File: base.py    From combo with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _set_weights(self, weights):
        """Internal function to set estimator weights.

        Parameters
        ----------
        weights : numpy array of shape (n_estimators,)
            Estimator weights. May be used after the alignment.

        Returns
        -------
        self

        """

        if weights is None:
            self.weights = np.ones([1, self.n_base_estimators_])
        else:
            self.weights = column_or_1d(weights).reshape(1, len(weights))
            assert (self.weights.shape[1] == self.n_base_estimators_)

            # adjust probability by a factor for integrity (added to 1)
            adjust_factor = self.weights.shape[1] / np.sum(weights)
            self.weights = self.weights * adjust_factor

            print(self.weights)
        return self 
Example #13
Source File: utils.py    From auto_ml with MIT License 5 votes vote down vote up
def transform(self, y):
        y = column_or_1d(y, warn=True)

        classes = np.unique(y)
        if len(np.intersect1d(classes, self.classes_)) < len(classes):
            diff = np.setdiff1d(classes, self.classes_)
            self.classes_ = np.hstack((self.classes_, diff))
        return np.searchsorted(self.classes_, y)[0] 
Example #14
Source File: scipy.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def transform(self, X):
		X = column_or_1d(X, warn = True)
		return self.bspline(X) 
Example #15
Source File: scipy.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		return self 
Example #16
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		return self 
Example #17
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def transform(self, X):
		X = column_or_1d(X, warn = True)
		Xt = pandas.cut(X, bins = self.bins, right = self.right, labels = self.labels, include_lowest = self.include_lowest)
		if isinstance(Xt, Categorical):
			Xt = numpy.asarray(Xt)
		return _col2d(Xt) 
Example #18
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		return self 
Example #19
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def transform(self, X):
		X = column_or_1d(X, warn = True)
		transform_dict = self._transform_dict()
		func = lambda k: transform_dict[k]
		if hasattr(X, "apply"):
			Xt = X.apply(func)
		else:
			Xt = numpy.array([func(row) for row in X])
		return _col2d(Xt) 
Example #20
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def transform(self, X):
		X = column_or_1d(X, warn = True)
		index = list(self.classes_)
		if self.sparse_output:
			Xt = lil_matrix((len(X), len(index)), dtype = numpy.int)
		else:
			Xt = numpy.zeros((len(X), len(index)), dtype = numpy.int)
		for i, v in enumerate(X):
			if not pandas.isnull(v):
				Xt[i, index.index(v)] = 1
		if self.sparse_output:
			Xt = Xt.tocsr()
		return Xt 
Example #21
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def transform(self, X):
		X = column_or_1d(X, warn = True)
		index = list(self.classes_)
		Xt = numpy.array([self.missing_values if pandas.isnull(v) else index.index(v) for v in X])
		return _col2d(Xt) 
Example #22
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def transform(self, X):
		X = column_or_1d(X, warn = True)
		func = lambda x: x[self.begin:self.end]
		Xt = eval_rows(X, func)
		return _col2d(Xt) 
Example #23
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		return self 
Example #24
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def transform(self, X):
		X = column_or_1d(X, warn = True)
		engine = _regex_engine(self.pattern)
		func = lambda x: engine.sub(self.replacement, x)
		Xt = eval_rows(X, func)
		return _col2d(Xt) 
Example #25
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		return self 
Example #26
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def transform(self, X):
		X = column_or_1d(X, warn = True)
		engine = _regex_engine(self.pattern)
		func = lambda x: bool(engine.search(x))
		Xt = eval_rows(X, func)
		return _col2d(Xt) 
Example #27
Source File: encode.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform(self, y):
        """Perform encoding if already fit.

        Parameters
        ----------

        y : array_like, shape=(n_samples,)
            The array to encode

        Returns
        -------

        e : array_like, shape=(n_samples,)
            The encoded array
        """
        check_is_fitted(self, 'classes_')
        y = column_or_1d(y, warn=True)

        classes = np.unique(y)
        _check_numpy_unicode_bug(classes)

        # Check not too many:
        unseen = _get_unseen()
        if len(classes) >= unseen:
            raise ValueError('Too many factor levels in feature. Max is %i' % unseen)

        e = np.array([
                         np.searchsorted(self.classes_, x) if x in self.classes_ else unseen
                         for x in y
                         ])

        return e 
Example #28
Source File: utility.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def score_to_label(pred_scores, outliers_fraction=0.1):
    """Turn raw outlier outlier scores to binary labels (0 or 1).

    Parameters
    ----------
    pred_scores : list or numpy array of shape (n_samples,)
        Raw outlier scores. Outliers are assumed have larger values.

    outliers_fraction : float in (0,1)
        Percentage of outliers.

    Returns
    -------
    outlier_labels : numpy array of shape (n_samples,)
        For each observation, tells whether or not
        it should be considered as an outlier according to the
        fitted model. Return the outlier probability, ranging
        in [0,1].
    """
    # check input values
    pred_scores = column_or_1d(pred_scores)
    check_parameter(outliers_fraction, 0, 1)

    threshold = percentile(pred_scores, 100 * (1 - outliers_fraction))
    pred_labels = (pred_scores > threshold).astype('int')
    return pred_labels 
Example #29
Source File: utility.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def precision_n_scores(y, y_pred, n=None):
    """Utility function to calculate precision @ rank n.

    Parameters
    ----------
    y : list or numpy array of shape (n_samples,)
        The ground truth. Binary (0: inliers, 1: outliers).

    y_pred : list or numpy array of shape (n_samples,)
        The raw outlier scores as returned by a fitted model.

    n : int, optional (default=None)
        The number of outliers. if not defined, infer using ground truth.

    Returns
    -------
    precision_at_rank_n : float
        Precision at rank n score.

    """

    # turn raw prediction decision scores into binary labels
    y_pred = get_label_n(y, y_pred, n)

    # enforce formats of y and labels_
    y = column_or_1d(y)
    y_pred = column_or_1d(y_pred)

    return precision_score(y, y_pred) 
Example #30
Source File: utility.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def invert_order(scores, method='multiplication'):
    """ Invert the order of a list of values. The smallest value becomes
    the largest in the inverted list. This is useful while combining
    multiple detectors since their score order could be different.

    Parameters
    ----------
    scores : list, array or numpy array with shape (n_samples,)
        The list of values to be inverted

    method : str, optional (default='multiplication')
        Methods used for order inversion. Valid methods are:

        - 'multiplication': multiply by -1
        - 'subtraction': max(scores) - scores

    Returns
    -------
    inverted_scores : numpy array of shape (n_samples,)
        The inverted list

    Examples
    --------
    >>> scores1 = [0.1, 0.3, 0.5, 0.7, 0.2, 0.1]
    >>> invert_order(scores1)
    array([-0.1, -0.3, -0.5, -0.7, -0.2, -0.1])
    >>> invert_order(scores1, method='subtraction')
    array([0.6, 0.4, 0.2, 0. , 0.5, 0.6])
    """

    scores = column_or_1d(scores)

    if method == 'multiplication':
        return scores.ravel() * -1

    if method == 'subtraction':
        return (scores.max() - scores).ravel()