Python sklearn.utils.check_array() Examples
The following are 30
code examples of sklearn.utils.check_array().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils
, or try the search function
.

Example #1
Source File: zca.py From zca with GNU General Public License v3.0 | 7 votes |
def fit(self, X, y=None): """Compute the mean, whitening and dewhitening matrices. Parameters ---------- X : array-like with shape [n_samples, n_features] The data used to compute the mean, whitening and dewhitening matrices. """ X = check_array(X, accept_sparse=None, copy=self.copy, ensure_2d=True) X = as_float_array(X, copy=self.copy) self.mean_ = X.mean(axis=0) X_ = X - self.mean_ cov = np.dot(X_.T, X_) / (X_.shape[0]-1) U, S, _ = linalg.svd(cov) s = np.sqrt(S.clip(self.regularization)) s_inv = np.diag(1./s) s = np.diag(s) self.whiten_ = np.dot(np.dot(U, s_inv), U.T) self.dewhiten_ = np.dot(np.dot(U, s), U.T) return self
Example #2
Source File: binning.py From pygbm with MIT License | 6 votes |
def fit(self, X, y=None): """Fit data X by computing the binning thresholds. Parameters ---------- X: array-like The data to bin Returns ------- self : object """ X = check_array(X) self.numerical_thresholds_ = _find_binning_thresholds( X, self.max_bins, subsample=self.subsample, random_state=self.random_state) self.n_bins_per_feature_ = np.array( [thresholds.shape[0] + 1 for thresholds in self.numerical_thresholds_], dtype=np.uint32 ) return self
Example #3
Source File: utils.py From tedana with GNU Lesser General Public License v2.1 | 6 votes |
def andb(arrs): """ Sums arrays in `arrs` Parameters ---------- arrs : :obj:`list` List of boolean or integer arrays to be summed Returns ------- result : :obj:`numpy.ndarray` Integer array of summed `arrs` """ # coerce to integer and ensure all arrays are the same shape arrs = [check_array(arr, dtype=int, ensure_2d=False, allow_nd=True) for arr in arrs] if not np.all([arr1.shape == arr2.shape for arr1 in arrs for arr2 in arrs]): raise ValueError('All input arrays must have same shape.') # sum across arrays result = np.sum(arrs, axis=0) return result
Example #4
Source File: _fastfood.py From scikit-learn-extra with BSD 3-Clause "New" or "Revised" License | 6 votes |
def transform(self, X): """Apply the approximate feature map to X. Parameters ---------- X : {array-like}, shape (n_samples, n_features) New data, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ X = check_array(X, dtype=np.float64) X_padded = self._pad_with_zeros(X) HGPHBX = self._apply_approximate_gaussian_matrix( self._B, self._G, self._P, X_padded ) VX = self._scale_transformed_data(self._S, HGPHBX) return self._phi(VX)
Example #5
Source File: _k_medoids.py From scikit-learn-extra with BSD 3-Clause "New" or "Revised" License | 6 votes |
def transform(self, X): """Transforms X to cluster-distance space. Parameters ---------- X : {array-like, sparse matrix}, shape (n_query, n_features), \ or (n_query, n_indexed) if metric == 'precomputed' Data to transform. Returns ------- X_new : {array-like, sparse matrix}, shape=(n_query, n_clusters) X transformed in the new space of distances to cluster centers. """ X = check_array(X, accept_sparse=["csr", "csc"]) if self.metric == "precomputed": check_is_fitted(self, "medoid_indices_") return X[:, self.medoid_indices_] else: check_is_fitted(self, "cluster_centers_") Y = self.cluster_centers_ return pairwise_distances(X, Y=Y, metric=self.metric)
Example #6
Source File: mnist.py From mlens with MIT License | 6 votes |
def load_data(dtype=np.float32, order='F'): """Load the data, then cache and memmap the train/test split""" ###################################################################### # Load dataset safe_print("Loading dataset...") data = fetch_mldata('MNIST original') X = check_array(data['data'], dtype=dtype, order=order) y = data["target"] # Normalize features X = X / 255 # Create train-test split (as [Joachims, 2006]) safe_print("Creating train-test split...") n_train = 60000 X_train = X[:n_train] y_train = y[:n_train] X_test = X[n_train:] y_test = y[n_train:] return X_train, X_test, y_train, y_test
Example #7
Source File: so_gaal.py From pyod with BSD 2-Clause "Simplified" License | 6 votes |
def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. The anomaly score of an input sample is computed based on different detector algorithms. For consistency, outliers are assigned with larger anomaly scores. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ check_is_fitted(self, ['discriminator']) X = check_array(X) pred_scores = self.discriminator.predict(X) return pred_scores
Example #8
Source File: lmdd.py From pyod with BSD 2-Clause "Simplified" License | 6 votes |
def fit(self, X, y=None): """Fit detector. y is ignored in unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object Fitted estimator. """ X = check_array(X) self._set_n_classes(y) self.decision_scores_ = self.decision_function(X) self._process_decision_scores() return self
Example #9
Source File: sod.py From pyod with BSD 2-Clause "Simplified" License | 6 votes |
def fit(self, X, y=None): """Fit detector. y is ignored in unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object Fitted estimator. """ # validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) self.decision_scores_ = self.decision_function(X) self._process_decision_scores() return self
Example #10
Source File: mo_gaal.py From pyod with BSD 2-Clause "Simplified" License | 6 votes |
def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. The anomaly score of an input sample is computed based on different detector algorithms. For consistency, outliers are assigned with larger anomaly scores. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ check_is_fitted(self, ['discriminator']) X = check_array(X) pred_scores = self.discriminator.predict(X) return pred_scores
Example #11
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_ordering(): # Check that ordering is enforced correctly by validation utilities. # We need to check each validation utility, because a 'copy' without # 'order=K' will kill the ordering. X = np.ones((10, 5)) for A in X, X.T: for copy in (True, False): B = check_array(A, order='C', copy=copy) assert B.flags['C_CONTIGUOUS'] B = check_array(A, order='F', copy=copy) assert B.flags['F_CONTIGUOUS'] if copy: assert A is not B X = sp.csr_matrix(X) X.data = X.data[::-1] assert not X.data.flags['C_CONTIGUOUS']
Example #12
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_check_array_accept_sparse_type_exception(): X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) invalid_type = SVR() msg = ("A sparse matrix was passed, but dense data is required. " "Use X.toarray() to convert to a dense numpy array.") assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=False) msg = ("Parameter 'accept_sparse' should be a string, " "boolean or list of strings. You provided 'accept_sparse={}'.") assert_raise_message(ValueError, msg.format(invalid_type), check_array, X_csr, accept_sparse=invalid_type) msg = ("When providing 'accept_sparse' as a tuple or list, " "it must contain at least one string value.") assert_raise_message(ValueError, msg.format([]), check_array, X_csr, accept_sparse=[]) assert_raise_message(ValueError, msg.format(()), check_array, X_csr, accept_sparse=()) assert_raise_message(TypeError, "SVR", check_array, X_csr, accept_sparse=[invalid_type])
Example #13
Source File: test_coordinate_descent.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_check_input_false(): X, y, _, _ = build_dataset(n_samples=20, n_features=10) X = check_array(X, order='F', dtype='float64') y = check_array(X, order='F', dtype='float64') clf = ElasticNet(selection='cyclic', tol=1e-8) # Check that no error is raised if data is provided in the right format clf.fit(X, y, check_input=False) # With check_input=False, an exhaustive check is not made on y but its # dtype is still cast in _preprocess_data to X's dtype. So the test should # pass anyway X = check_array(X, order='F', dtype='float32') clf.fit(X, y, check_input=False) # With no input checking, providing X in C order should result in false # computation X = check_array(X, order='C', dtype='float64') assert_raises(ValueError, clf.fit, X, y, check_input=False)
Example #14
Source File: forest_embedding.py From RandomForestClustering with MIT License | 6 votes |
def fit_transform(self, X, y=None, sample_weight=None): X = check_array(X, accept_sparse=['csc'], ensure_2d=False) if sp.issparse(X): # Pre-sort indices to avoid that each individual tree of the # ensemble sorts the indices. X.sort_indices() X_, y_ = generate_discriminative_dataset(X) super(RandomForestEmbedding, self).fit(X_, y_, sample_weight=sample_weight) self.one_hot_encoder_ = OneHotEncoder(sparse=True) if self.sparse_output: return self.one_hot_encoder_.fit_transform(self.apply(X)) return self.apply(X)
Example #15
Source File: k_medoids.py From RandomForestClustering with MIT License | 6 votes |
def predict(self, X): check_is_fitted(self, "cluster_centers_") # Check that the array is good and attempt to convert it to # Numpy array if possible X = check_array(X) # Apply distance metric wrt. cluster centers (medoids) D = self.distance_func(X, Y=self.cluster_centers_) # Assign data points to clusters based on # which cluster assignment yields # the smallest distance labels = np.argmin(D, axis=1) return labels
Example #16
Source File: cycle.py From xam with MIT License | 6 votes |
def transform(self, X, y=None): # scikit-learn checks X = check_array(X) if X.shape[1] != len(self.maximums_): raise ValueError("X has different shape than during fitting. " "Expected %d, got %d." % (len(self.maximums_), X.shape[1])) return np.vstack(( np.array([ np.cos(2 * np.pi * x / (maximum + 1)) for x, maximum in zip(X.T, self.maximums_) ]), np.array([ np.sin(2 * np.pi * x / (maximum + 1)) for x, maximum in zip(X.T, self.maximums_) ]) )).T
Example #17
Source File: equal_width.py From xam with MIT License | 6 votes |
def fit(self, X, y=None, **fit_params): """Choose equally spaces cut points.""" # scikit-learn checks X = check_array(X) self.cut_points_ = [0] * X.shape[1] for i, x in enumerate(X.T): x_min = np.min(x) x_max = np.max(x) if x_min == x_max: self.cut_points_[i] = np.array([x_min]) else: step = (x_max - x_min) / self.n_bins self.cut_points_[i] = np.arange(start=x_min+step, stop=x_max, step=step).tolist() return self
Example #18
Source File: linear_regression.py From differential-privacy-library with MIT License | 5 votes |
def _preprocess_data(X, y, fit_intercept, epsilon=1.0, bounds_X=None, bounds_y=None, copy=True, check_input=True, **unused_args): warn_unused_args(unused_args) if check_input: X = check_array(X, copy=copy, accept_sparse=False, dtype=FLOAT_DTYPES) elif copy: X = X.copy(order='K') y = np.asarray(y, dtype=X.dtype) X_scale = np.ones(X.shape[1], dtype=X.dtype) if fit_intercept: bounds_X = check_bounds(bounds_X, X.shape[1]) bounds_y = check_bounds(bounds_y, y.shape[1] if y.ndim > 1 else 1) X = clip_to_bounds(X, bounds_X) y = clip_to_bounds(y, bounds_y) X_offset = mean(X, axis=0, bounds=bounds_X, epsilon=epsilon, accountant=BudgetAccountant()) X -= X_offset y_offset = mean(y, axis=0, bounds=bounds_y, epsilon=epsilon, accountant=BudgetAccountant()) y = y - y_offset else: X_offset = np.zeros(X.shape[1], dtype=X.dtype) if y.ndim == 1: y_offset = X.dtype.type(0) else: y_offset = np.zeros(y.shape[1], dtype=X.dtype) return X, y, X_offset, y_offset, X_scale # noinspection PyPep8Naming,PyAttributeOutsideInit
Example #19
Source File: instruments.py From CalibrationNN with GNU General Public License v3.0 | 5 votes |
def fit(self, X, y=None): if self.validate: check_array(X, self.accept_sparse) return self
Example #20
Source File: instruments.py From CalibrationNN with GNU General Public License v3.0 | 5 votes |
def transform(self, X, y=None): if self.validate: X = check_array(X, self.accept_sparse) if self.func is None: return X return self.func(X)
Example #21
Source File: instruments.py From CalibrationNN with GNU General Public License v3.0 | 5 votes |
def inverse_transform(self, X, y=None): if self.validate: X = check_array(X, self.accept_sparse) if self.inv_func is None: return X return self.inv_func(X)
Example #22
Source File: mdsp.py From libact with BSD 2-Clause "Simplified" License | 5 votes |
def fit_transform(self, X, y=None, init=None): """ Fit the data from X, and returns the embedded coordinates Parameters ---------- X : array, shape=[n_samples, n_features], or [n_samples, n_samples] \ if dissimilarity='precomputed' Input data. init : {None or ndarray, shape (n_samples,)}, optional If None, randomly chooses the initial configuration if ndarray, initialize the SMACOF algorithm with this array. """ X = check_array(X) if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed": warnings.warn("The MDS API has changed. ``fit`` now constructs an" " dissimilarity matrix from data. To use a custom " "dissimilarity matrix, set " "``dissimilarity=precomputed``.") if self.dissimilarity == "precomputed": self.dissimilarity_matrix_ = X elif self.dissimilarity == "euclidean": self.dissimilarity_matrix_ = euclidean_distances(X) else: raise ValueError("Proximity must be 'precomputed' or 'euclidean'." " Got %s instead" % str(self.dissimilarity)) self.embedding_, self.stress_, self.n_iter_ = smacof_p( self.dissimilarity_matrix_, self.n_uq, metric=self.metric, n_components=self.n_components, init=init, n_init=self.n_init, n_jobs=self.n_jobs, max_iter=self.max_iter, verbose=self.verbose, eps=self.eps, random_state=self.random_state, return_n_iter=True) return self.embedding_
Example #23
Source File: dqrutl.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_coef(self, X): qr, qraux = self.qr, self.qraux n, p = qr.shape # sanity check assert isinstance(qr, np.ndarray), 'internal error: QR should be a np.ndarray but got %s' % type(qr) assert isinstance(qraux, np.ndarray), 'internal error: qraux should be a np.ndarray but got %s' % type(qraux) # validate input array X = check_array(X, dtype='numeric', copy=True, order='F') nx, ny = X.shape if nx != n: raise ValueError('qr and X must have same number of rows') # check on size _validate_matrix_size(n, p) # get the rank of the decomposition k = self.rank # get ix vector # if p > n: # ix = np.ones(n + (p - n)) * np.nan # ix[:n] = np.arange(n) # i.e., array([0,1,2,nan,nan,nan]) # else: # ix = np.arange(n) # set up the structures to alter coef, info = (np.zeros((k, ny), dtype=np.double, order='F'), np.zeros(1, dtype=np.int, order='F')) # call the fortran module IN PLACE _safecall(dqrsl.dqrcf, qr, n, k, qraux, X, ny, coef, 0) # post-processing # if k < p: # cf = np.ones((p,ny)) * np.nan # cf[self.pivot[np.arange(k)], :] = coef return coef if not k < p else coef[self.pivot[np.arange(k)], :]
Example #24
Source File: stability_selection.py From stability-selection with BSD 3-Clause "New" or "Revised" License | 5 votes |
def transform(self, X, threshold=None): """Reduce X to the selected features. Parameters ---------- X : array of shape [n_samples, n_features] The input samples. threshold: float. Threshold defining the minimum cutoff value for the stability scores. Returns ------- X_r : array of shape [n_samples, n_selected_features] The input samples with only the selected features. """ X = check_array(X, accept_sparse='csr') mask = self.get_support(threshold=threshold) check_is_fitted(self, 'stability_scores_') if len(mask) != X.shape[1]: raise ValueError("X has a different shape than during fitting.") if not mask.any(): warn("No features were selected: either the data is" " too noisy or the selection test too strict.", UserWarning) return np.empty(0).reshape((X.shape[0], 0)) return X[:, safe_mask(X, mask)]
Example #25
Source File: _k_medoids.py From scikit-learn-extra with BSD 3-Clause "New" or "Revised" License | 5 votes |
def predict(self, X): """Predict the closest cluster for each sample in X. Parameters ---------- X : {array-like, sparse matrix}, shape (n_query, n_features), \ or (n_query, n_indexed) if metric == 'precomputed' New data to predict. Returns ------- labels : array, shape = (n_query,) Index of the cluster each sample belongs to. """ X = check_array(X, accept_sparse=["csr", "csc"]) if self.metric == "precomputed": check_is_fitted(self, "medoid_indices_") return np.argmin(X[:, self.medoid_indices_], axis=1) else: check_is_fitted(self, "cluster_centers_") # Return data points to clusters based on which cluster assignment # yields the smallest distance return pairwise_distances_argmin( X, Y=self.cluster_centers_, metric=self.metric )
Example #26
Source File: abod.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def fit(self, X, y=None): """Fit detector. y is ignored in unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object Fitted estimator. """ # validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) self.X_train_ = X self.n_train_ = X.shape[0] self.decision_scores_ = np.zeros([self.n_train_, 1]) if self.method == 'fast': self._fit_fast() elif self.method == 'default': self._fit_default() else: raise ValueError(self.method, "is not a valid method") # flip the scores self.decision_scores_ = self.decision_scores_.ravel() * -1 self._process_decision_scores() return self
Example #27
Source File: abod.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. The anomaly score of an input sample is computed based on different detector algorithms. For consistency, outliers are assigned with larger anomaly scores. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ check_is_fitted(self, ['X_train_', 'n_train_', 'decision_scores_', 'threshold_', 'labels_']) X = check_array(X) if self.method == 'fast': # fast ABOD # outliers have higher outlier scores return self._decision_function_fast(X) * -1 else: # default ABOD return self._decision_function_default(X) * -1
Example #28
Source File: sos.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def fit(self, X, y=None): """Fit detector. y is ignored in unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object Fitted estimator. """ X = check_array(X) self._set_n_classes(y) D = self._x2d(X) A = self._d2a(D) B = self._a2b(A) O = self._b2o(B) # Invert decision_scores_. Outliers comes with higher outlier scores self.decision_scores_ = O self._process_decision_scores() return self
Example #29
Source File: hbos.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. The anomaly score of an input sample is computed based on different detector algorithms. For consistency, outliers are assigned with larger anomaly scores. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ check_is_fitted(self, ['hist_', 'bin_edges_']) X = check_array(X) # outlier_scores = self._calculate_outlier_scores(X) outlier_scores = _calculate_outlier_scores(X, self.bin_edges_, self.hist_, self.n_bins, self.alpha, self.tol) return invert_order(np.sum(outlier_scores, axis=1))
Example #30
Source File: loci.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def fit(self, X, y=None): """Fit the model using X as training data. Parameters ---------- X : array, shape (n_samples, n_features) Training data. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object """ X = check_array(X) self._set_n_classes(y) outlier_scores = self._calculate_decision_score(X) self.decision_scores_ = np.array(outlier_scores) self.labels_ = (self.decision_scores_ > self.threshold_).astype( 'int').ravel() # calculate for predict_proba() self._mu = np.mean(self.decision_scores_) self._sigma = np.std(self.decision_scores_) return self