Python sklearn.utils.check_array() Examples

The following are 30 code examples of sklearn.utils.check_array(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils , or try the search function .
Example #1
Source File: zca.py    From zca with GNU General Public License v3.0 7 votes vote down vote up
def fit(self, X, y=None):
        """Compute the mean, whitening and dewhitening matrices.

        Parameters
        ----------
        X : array-like with shape [n_samples, n_features]
            The data used to compute the mean, whitening and dewhitening
            matrices.
        """
        X = check_array(X, accept_sparse=None, copy=self.copy,
                        ensure_2d=True)
        X = as_float_array(X, copy=self.copy)
        self.mean_ = X.mean(axis=0)
        X_ = X - self.mean_
        cov = np.dot(X_.T, X_) / (X_.shape[0]-1)
        U, S, _ = linalg.svd(cov)
        s = np.sqrt(S.clip(self.regularization))
        s_inv = np.diag(1./s)
        s = np.diag(s)
        self.whiten_ = np.dot(np.dot(U, s_inv), U.T)
        self.dewhiten_ = np.dot(np.dot(U, s), U.T)
        return self 
Example #2
Source File: binning.py    From pygbm with MIT License 6 votes vote down vote up
def fit(self, X, y=None):
        """Fit data X by computing the binning thresholds.

        Parameters
        ----------
        X: array-like
            The data to bin

        Returns
        -------
        self : object
        """
        X = check_array(X)
        self.numerical_thresholds_ = _find_binning_thresholds(
            X, self.max_bins, subsample=self.subsample,
            random_state=self.random_state)

        self.n_bins_per_feature_ = np.array(
            [thresholds.shape[0] + 1
             for thresholds in self.numerical_thresholds_],
            dtype=np.uint32
        )

        return self 
Example #3
Source File: utils.py    From tedana with GNU Lesser General Public License v2.1 6 votes vote down vote up
def andb(arrs):
    """
    Sums arrays in `arrs`

    Parameters
    ----------
    arrs : :obj:`list`
        List of boolean or integer arrays to be summed

    Returns
    -------
    result : :obj:`numpy.ndarray`
        Integer array of summed `arrs`
    """

    # coerce to integer and ensure all arrays are the same shape
    arrs = [check_array(arr, dtype=int, ensure_2d=False, allow_nd=True) for arr in arrs]
    if not np.all([arr1.shape == arr2.shape for arr1 in arrs for arr2 in arrs]):
        raise ValueError('All input arrays must have same shape.')

    # sum across arrays
    result = np.sum(arrs, axis=0)

    return result 
Example #4
Source File: _fastfood.py    From scikit-learn-extra with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def transform(self, X):
        """Apply the approximate feature map to X.

        Parameters
        ----------
        X : {array-like}, shape (n_samples, n_features)
            New data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)
        """
        X = check_array(X, dtype=np.float64)
        X_padded = self._pad_with_zeros(X)
        HGPHBX = self._apply_approximate_gaussian_matrix(
            self._B, self._G, self._P, X_padded
        )
        VX = self._scale_transformed_data(self._S, HGPHBX)
        return self._phi(VX) 
Example #5
Source File: _k_medoids.py    From scikit-learn-extra with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def transform(self, X):
        """Transforms X to cluster-distance space.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_query, n_features), \
                or (n_query, n_indexed) if metric == 'precomputed'
            Data to transform.

        Returns
        -------
        X_new : {array-like, sparse matrix}, shape=(n_query, n_clusters)
            X transformed in the new space of distances to cluster centers.
        """
        X = check_array(X, accept_sparse=["csr", "csc"])

        if self.metric == "precomputed":
            check_is_fitted(self, "medoid_indices_")
            return X[:, self.medoid_indices_]
        else:
            check_is_fitted(self, "cluster_centers_")

            Y = self.cluster_centers_
            return pairwise_distances(X, Y=Y, metric=self.metric) 
Example #6
Source File: mnist.py    From mlens with MIT License 6 votes vote down vote up
def load_data(dtype=np.float32, order='F'):
    """Load the data, then cache and memmap the train/test split"""
    ######################################################################
    # Load dataset
    safe_print("Loading dataset...")
    data = fetch_mldata('MNIST original')
    X = check_array(data['data'], dtype=dtype, order=order)
    y = data["target"]

    # Normalize features
    X = X / 255

    # Create train-test split (as [Joachims, 2006])
    safe_print("Creating train-test split...")
    n_train = 60000
    X_train = X[:n_train]
    y_train = y[:n_train]
    X_test = X[n_train:]
    y_test = y[n_train:]

    return X_train, X_test, y_train, y_test 
Example #7
Source File: so_gaal.py    From pyod with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        check_is_fitted(self, ['discriminator'])
        X = check_array(X)
        pred_scores = self.discriminator.predict(X)
        return pred_scores 
Example #8
Source File: lmdd.py    From pyod with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """
        X = check_array(X)
        self._set_n_classes(y)
        self.decision_scores_ = self.decision_function(X)
        self._process_decision_scores()
        return self 
Example #9
Source File: sod.py    From pyod with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """

        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)
        self.decision_scores_ = self.decision_function(X)
        self._process_decision_scores()

        return self 
Example #10
Source File: mo_gaal.py    From pyod with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        check_is_fitted(self, ['discriminator'])
        X = check_array(X)
        pred_scores = self.discriminator.predict(X)
        return pred_scores 
Example #11
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_ordering():
    # Check that ordering is enforced correctly by validation utilities.
    # We need to check each validation utility, because a 'copy' without
    # 'order=K' will kill the ordering.
    X = np.ones((10, 5))
    for A in X, X.T:
        for copy in (True, False):
            B = check_array(A, order='C', copy=copy)
            assert B.flags['C_CONTIGUOUS']
            B = check_array(A, order='F', copy=copy)
            assert B.flags['F_CONTIGUOUS']
            if copy:
                assert A is not B

    X = sp.csr_matrix(X)
    X.data = X.data[::-1]
    assert not X.data.flags['C_CONTIGUOUS'] 
Example #12
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_check_array_accept_sparse_type_exception():
    X = [[1, 2], [3, 4]]
    X_csr = sp.csr_matrix(X)
    invalid_type = SVR()

    msg = ("A sparse matrix was passed, but dense data is required. "
           "Use X.toarray() to convert to a dense numpy array.")
    assert_raise_message(TypeError, msg,
                         check_array, X_csr, accept_sparse=False)

    msg = ("Parameter 'accept_sparse' should be a string, "
           "boolean or list of strings. You provided 'accept_sparse={}'.")
    assert_raise_message(ValueError, msg.format(invalid_type),
                         check_array, X_csr, accept_sparse=invalid_type)

    msg = ("When providing 'accept_sparse' as a tuple or list, "
           "it must contain at least one string value.")
    assert_raise_message(ValueError, msg.format([]),
                         check_array, X_csr, accept_sparse=[])
    assert_raise_message(ValueError, msg.format(()),
                         check_array, X_csr, accept_sparse=())

    assert_raise_message(TypeError, "SVR",
                         check_array, X_csr, accept_sparse=[invalid_type]) 
Example #13
Source File: test_coordinate_descent.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_check_input_false():
    X, y, _, _ = build_dataset(n_samples=20, n_features=10)
    X = check_array(X, order='F', dtype='float64')
    y = check_array(X, order='F', dtype='float64')
    clf = ElasticNet(selection='cyclic', tol=1e-8)
    # Check that no error is raised if data is provided in the right format
    clf.fit(X, y, check_input=False)
    # With check_input=False, an exhaustive check is not made on y but its
    # dtype is still cast in _preprocess_data to X's dtype. So the test should
    # pass anyway
    X = check_array(X, order='F', dtype='float32')
    clf.fit(X, y, check_input=False)
    # With no input checking, providing X in C order should result in false
    # computation
    X = check_array(X, order='C', dtype='float64')
    assert_raises(ValueError, clf.fit, X, y, check_input=False) 
Example #14
Source File: forest_embedding.py    From RandomForestClustering with MIT License 6 votes vote down vote up
def fit_transform(self, X, y=None, sample_weight=None):
        X = check_array(X, accept_sparse=['csc'], ensure_2d=False)

        if sp.issparse(X):
            # Pre-sort indices to avoid that each individual tree of the
            # ensemble sorts the indices.
            X.sort_indices()

        X_, y_ = generate_discriminative_dataset(X)

        super(RandomForestEmbedding, self).fit(X_, y_,
                                               sample_weight=sample_weight)

        self.one_hot_encoder_ = OneHotEncoder(sparse=True)
        if self.sparse_output:
            return self.one_hot_encoder_.fit_transform(self.apply(X))
        return self.apply(X) 
Example #15
Source File: k_medoids.py    From RandomForestClustering with MIT License 6 votes vote down vote up
def predict(self, X):

        check_is_fitted(self, "cluster_centers_")

        # Check that the array is good and attempt to convert it to
        # Numpy array if possible
        X = check_array(X)

        # Apply distance metric wrt. cluster centers (medoids)
        D = self.distance_func(X, Y=self.cluster_centers_)

        # Assign data points to clusters based on
        # which cluster assignment yields
        # the smallest distance
        labels = np.argmin(D, axis=1)

        return labels 
Example #16
Source File: cycle.py    From xam with MIT License 6 votes vote down vote up
def transform(self, X, y=None):

        # scikit-learn checks
        X = check_array(X)

        if X.shape[1] != len(self.maximums_):
            raise ValueError("X has different shape than during fitting. "
                             "Expected %d, got %d." % (len(self.maximums_), X.shape[1]))

        return np.vstack((
            np.array([
                np.cos(2 * np.pi * x / (maximum + 1))
                for x, maximum in zip(X.T, self.maximums_)
            ]),
            np.array([
                np.sin(2 * np.pi * x / (maximum + 1))
                for x, maximum in zip(X.T, self.maximums_)
            ])
        )).T 
Example #17
Source File: equal_width.py    From xam with MIT License 6 votes vote down vote up
def fit(self, X, y=None, **fit_params):
        """Choose equally spaces cut points."""

        # scikit-learn checks
        X = check_array(X)

        self.cut_points_ = [0] * X.shape[1]

        for i, x in enumerate(X.T):
            x_min = np.min(x)
            x_max = np.max(x)

            if x_min == x_max:
                self.cut_points_[i] = np.array([x_min])
            else:
                step = (x_max - x_min) / self.n_bins
                self.cut_points_[i] = np.arange(start=x_min+step, stop=x_max, step=step).tolist()

        return self 
Example #18
Source File: linear_regression.py    From differential-privacy-library with MIT License 5 votes vote down vote up
def _preprocess_data(X, y, fit_intercept, epsilon=1.0, bounds_X=None, bounds_y=None, copy=True, check_input=True,
                     **unused_args):
    warn_unused_args(unused_args)

    if check_input:
        X = check_array(X, copy=copy, accept_sparse=False, dtype=FLOAT_DTYPES)
    elif copy:
        X = X.copy(order='K')

    y = np.asarray(y, dtype=X.dtype)
    X_scale = np.ones(X.shape[1], dtype=X.dtype)

    if fit_intercept:
        bounds_X = check_bounds(bounds_X, X.shape[1])
        bounds_y = check_bounds(bounds_y, y.shape[1] if y.ndim > 1 else 1)

        X = clip_to_bounds(X, bounds_X)
        y = clip_to_bounds(y, bounds_y)

        X_offset = mean(X, axis=0, bounds=bounds_X, epsilon=epsilon, accountant=BudgetAccountant())
        X -= X_offset
        y_offset = mean(y, axis=0, bounds=bounds_y, epsilon=epsilon, accountant=BudgetAccountant())
        y = y - y_offset
    else:
        X_offset = np.zeros(X.shape[1], dtype=X.dtype)
        if y.ndim == 1:
            y_offset = X.dtype.type(0)
        else:
            y_offset = np.zeros(y.shape[1], dtype=X.dtype)

    return X, y, X_offset, y_offset, X_scale


# noinspection PyPep8Naming,PyAttributeOutsideInit 
Example #19
Source File: instruments.py    From CalibrationNN with GNU General Public License v3.0 5 votes vote down vote up
def fit(self, X, y=None):
        if self.validate:
            check_array(X, self.accept_sparse)
        return self 
Example #20
Source File: instruments.py    From CalibrationNN with GNU General Public License v3.0 5 votes vote down vote up
def transform(self, X, y=None):
        if self.validate:
            X = check_array(X, self.accept_sparse)
        if self.func is None:
            return X
        return self.func(X) 
Example #21
Source File: instruments.py    From CalibrationNN with GNU General Public License v3.0 5 votes vote down vote up
def inverse_transform(self, X, y=None):
        if self.validate:
            X = check_array(X, self.accept_sparse)            
        if self.inv_func is None:
            return X
        return self.inv_func(X) 
Example #22
Source File: mdsp.py    From libact with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def fit_transform(self, X, y=None, init=None):
        """
        Fit the data from X, and returns the embedded coordinates

        Parameters
        ----------
        X : array, shape=[n_samples, n_features], or [n_samples, n_samples] \
                if dissimilarity='precomputed'
            Input data.

        init : {None or ndarray, shape (n_samples,)}, optional
            If None, randomly chooses the initial configuration
            if ndarray, initialize the SMACOF algorithm with this array.

        """
        X = check_array(X)
        if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed":
            warnings.warn("The MDS API has changed. ``fit`` now constructs an"
                          " dissimilarity matrix from data. To use a custom "
                          "dissimilarity matrix, set "
                          "``dissimilarity=precomputed``.")

        if self.dissimilarity == "precomputed":
            self.dissimilarity_matrix_ = X
        elif self.dissimilarity == "euclidean":
            self.dissimilarity_matrix_ = euclidean_distances(X)
        else:
            raise ValueError("Proximity must be 'precomputed' or 'euclidean'."
                             " Got %s instead" % str(self.dissimilarity))

        self.embedding_, self.stress_, self.n_iter_ = smacof_p(
            self.dissimilarity_matrix_, self.n_uq, metric=self.metric,
            n_components=self.n_components, init=init, n_init=self.n_init,
            n_jobs=self.n_jobs, max_iter=self.max_iter, verbose=self.verbose,
            eps=self.eps, random_state=self.random_state,
            return_n_iter=True)

        return self.embedding_ 
Example #23
Source File: dqrutl.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_coef(self, X):
        qr, qraux = self.qr, self.qraux
        n, p = qr.shape

        # sanity check
        assert isinstance(qr, np.ndarray), 'internal error: QR should be a np.ndarray but got %s' % type(qr)
        assert isinstance(qraux, np.ndarray), 'internal error: qraux should be a np.ndarray but got %s' % type(qraux)

        # validate input array
        X = check_array(X, dtype='numeric', copy=True, order='F')
        nx, ny = X.shape
        if nx != n:
            raise ValueError('qr and X must have same number of rows')

        # check on size
        _validate_matrix_size(n, p)

        # get the rank of the decomposition
        k = self.rank

        # get ix vector
        # if p > n:
        #   ix = np.ones(n + (p - n)) * np.nan
        #   ix[:n] = np.arange(n) # i.e., array([0,1,2,nan,nan,nan])
        # else:
        #   ix = np.arange(n)

        # set up the structures to alter
        coef, info = (np.zeros((k, ny), dtype=np.double, order='F'),
                      np.zeros(1, dtype=np.int, order='F'))

        # call the fortran module IN PLACE
        _safecall(dqrsl.dqrcf, qr, n, k, qraux, X, ny, coef, 0)

        # post-processing
        # if k < p:
        #   cf = np.ones((p,ny)) * np.nan
        #   cf[self.pivot[np.arange(k)], :] = coef
        return coef if not k < p else coef[self.pivot[np.arange(k)], :] 
Example #24
Source File: stability_selection.py    From stability-selection with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform(self, X, threshold=None):
        """Reduce X to the selected features.

        Parameters
        ----------
        X : array of shape [n_samples, n_features]
            The input samples.

        threshold: float.
            Threshold defining the minimum cutoff value for the
            stability scores.

        Returns
        -------
        X_r : array of shape [n_samples, n_selected_features]
            The input samples with only the selected features.
        """
        X = check_array(X, accept_sparse='csr')
        mask = self.get_support(threshold=threshold)

        check_is_fitted(self, 'stability_scores_')

        if len(mask) != X.shape[1]:
            raise ValueError("X has a different shape than during fitting.")

        if not mask.any():
            warn("No features were selected: either the data is"
                 " too noisy or the selection test too strict.",
                 UserWarning)
            return np.empty(0).reshape((X.shape[0], 0))

        return X[:, safe_mask(X, mask)] 
Example #25
Source File: _k_medoids.py    From scikit-learn-extra with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def predict(self, X):
        """Predict the closest cluster for each sample in X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_query, n_features), \
                or (n_query, n_indexed) if metric == 'precomputed'
            New data to predict.

        Returns
        -------
        labels : array, shape = (n_query,)
            Index of the cluster each sample belongs to.
        """
        X = check_array(X, accept_sparse=["csr", "csc"])

        if self.metric == "precomputed":
            check_is_fitted(self, "medoid_indices_")
            return np.argmin(X[:, self.medoid_indices_], axis=1)
        else:
            check_is_fitted(self, "cluster_centers_")

            # Return data points to clusters based on which cluster assignment
            # yields the smallest distance
            return pairwise_distances_argmin(
                X, Y=self.cluster_centers_, metric=self.metric
            ) 
Example #26
Source File: abod.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        self.X_train_ = X
        self.n_train_ = X.shape[0]
        self.decision_scores_ = np.zeros([self.n_train_, 1])

        if self.method == 'fast':
            self._fit_fast()
        elif self.method == 'default':
            self._fit_default()
        else:
            raise ValueError(self.method, "is not a valid method")

        # flip the scores
        self.decision_scores_ = self.decision_scores_.ravel() * -1
        self._process_decision_scores()
        return self 
Example #27
Source File: abod.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """

        check_is_fitted(self, ['X_train_', 'n_train_', 'decision_scores_',
                               'threshold_', 'labels_'])
        X = check_array(X)

        if self.method == 'fast':  # fast ABOD
            # outliers have higher outlier scores
            return self._decision_function_fast(X) * -1
        else:  # default ABOD
            return self._decision_function_default(X) * -1 
Example #28
Source File: sos.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """
        X = check_array(X)
        self._set_n_classes(y)
        D = self._x2d(X)
        A = self._d2a(D)
        B = self._a2b(A)
        O = self._b2o(B)
        # Invert decision_scores_. Outliers comes with higher outlier scores
        self.decision_scores_ = O
        self._process_decision_scores()
        return self 
Example #29
Source File: hbos.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        check_is_fitted(self, ['hist_', 'bin_edges_'])
        X = check_array(X)

        # outlier_scores = self._calculate_outlier_scores(X)
        outlier_scores = _calculate_outlier_scores(X, self.bin_edges_,
                                                   self.hist_,
                                                   self.n_bins,
                                                   self.alpha, self.tol)
        return invert_order(np.sum(outlier_scores, axis=1)) 
Example #30
Source File: loci.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def fit(self, X, y=None):
        """Fit the model using X as training data.
        
        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            Training data.

        y : Ignored
            Not used, present for API consistency by convention.
            
        Returns
        -------
        self : object

        """
        X = check_array(X)
        self._set_n_classes(y)
        outlier_scores = self._calculate_decision_score(X)
        self.decision_scores_ = np.array(outlier_scores)
        self.labels_ = (self.decision_scores_ > self.threshold_).astype(
            'int').ravel()

        # calculate for predict_proba()

        self._mu = np.mean(self.decision_scores_)
        self._sigma = np.std(self.decision_scores_)
        return self