Python sklearn.utils.validation.FLOAT_DTYPES Examples

The following are 29 code examples of sklearn.utils.validation.FLOAT_DTYPES(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils.validation , or try the search function .
Example #1
Source File: naive_bayes.py    From scikit-lego with MIT License 6 votes vote down vote up
def predict_proba(self, X: np.array):
        check_is_fitted(self, ["gmms_", "classes_", "num_fit_cols_"])
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        if self.num_fit_cols_ != X.shape[1]:
            raise ValueError(
                f"number of columns {X.shape[1]} does not match fit size {self.num_fit_cols_}"
            )
        check_is_fitted(self, ["gmms_", "classes_"])
        probs = np.zeros((X.shape[0], len(self.classes_)))
        for k, v in self.gmms_.items():
            class_idx = int(np.argwhere(self.classes_ == k))
            probs[:, class_idx] = np.array(
                [
                    m.score_samples(np.expand_dims(X[:, idx], 1))
                    for idx, m in enumerate(v)
                ]
            ).sum(axis=0)
        likelihood = np.exp(probs)
        return likelihood / likelihood.sum(axis=1).reshape(-1, 1) 
Example #2
Source File: umap_reconstruction.py    From scikit-lego with MIT License 6 votes vote down vote up
def fit(self, X, y=None):
        """
        Fit the model using X as training data.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :param y: ignored but kept in for pipeline support
        :return: Returns an instance of self.
        """
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        if self.n_components < 2:
            raise ValueError("Number of components must be at least two.")
        if not self.threshold:
            raise ValueError(f"The `threshold` value cannot be `None`.")

        self.umap_ = umap.UMAP(
            n_components=self.n_components,
            n_neighbors=self.n_neighbors,
            min_dist=self.min_dist,
            metric=self.metric,
            random_state=self.random_state,
        )
        self.umap_.fit(X, y)
        self.offset_ = -self.threshold
        return self 
Example #3
Source File: bayesian_gmm_classifier.py    From scikit-lego with MIT License 5 votes vote down vote up
def predict_proba(self, X):
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        check_is_fitted(self, ["gmms_", "classes_"])
        res = np.zeros((X.shape[0], self.classes_.shape[0]))
        for idx, c in enumerate(self.classes_):
            res[:, idx] = self.gmms_[c].score_samples(X)
        return np.exp(res) / np.exp(res).sum(axis=1)[:, np.newaxis] 
Example #4
Source File: equal_groups.py    From Same-Size-K-Means with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _check_test_data(self, X):
        X = check_array(X, accept_sparse='csr', dtype=FLOAT_DTYPES,
                        warn_on_dtype=True)
        n_samples, n_features = X.shape
        expected_n_features = self.cluster_centers_.shape[1]
        if not n_features == expected_n_features:
            raise ValueError("Incorrect number of features. "
                             "Got %d features, expected %d" % (
                                 n_features, expected_n_features))

        return X 
Example #5
Source File: naive_bayes.py    From scikit-lego with MIT License 5 votes vote down vote up
def predict(self, X):
        check_is_fitted(self, ["gmms_", "classes_", "num_fit_cols_"])
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        return self.classes_[self.predict_proba(X).argmax(axis=1)] 
Example #6
Source File: naive_bayes.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.array, y: np.array) -> "BayesianGaussianMixtureNB":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        self.num_fit_cols_ = X.shape[1]
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            self.gmms_[c] = [
                BayesianGaussianMixture(
                    n_components=self.n_components,
                    covariance_type=self.covariance_type,
                    tol=self.tol,
                    reg_covar=self.reg_covar,
                    max_iter=self.max_iter,
                    n_init=self.n_init,
                    init_params=self.init_params,
                    weight_concentration_prior_type=self.weight_concentration_prior_type,
                    weight_concentration_prior=self.weight_concentration_prior,
                    mean_precision_prior=self.mean_precision_prior,
                    mean_prior=self.mean_prior,
                    degrees_of_freedom_prior=self.degrees_of_freedom_prior,
                    covariance_prior=self.covariance_prior,
                    random_state=self.random_state,
                    warm_start=self.warm_start,
                    verbose=self.verbose,
                    verbose_interval=self.verbose_interval,
                ).fit(subset_x[:, i].reshape(-1, 1), subset_y)
                for i in range(X.shape[1])
            ]
        return self 
Example #7
Source File: naive_bayes.py    From scikit-lego with MIT License 5 votes vote down vote up
def predict(self, X):
        check_is_fitted(self, ["gmms_", "classes_"])
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        return self.classes_[self.predict_proba(X).argmax(axis=1)] 
Example #8
Source File: naive_bayes.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.array, y: np.array) -> "GaussianMixtureNB":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        self.num_fit_cols_ = X.shape[1]
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            self.gmms_[c] = [
                GaussianMixture(
                    n_components=self.n_components,
                    covariance_type=self.covariance_type,
                    tol=self.tol,
                    reg_covar=self.reg_covar,
                    max_iter=self.max_iter,
                    n_init=self.n_init,
                    init_params=self.init_params,
                    weights_init=self.weights_init,
                    means_init=self.means_init,
                    precisions_init=self.precisions_init,
                    random_state=self.random_state,
                    warm_start=self.warm_start,
                ).fit(subset_x[:, i].reshape(-1, 1), subset_y)
                for i in range(X.shape[1])
            ]
        return self 
Example #9
Source File: randomadder.py    From scikit-lego with MIT License 5 votes vote down vote up
def transform_train(self, X):
        rs = check_random_state(self.random_state)
        check_is_fitted(self, ["dim_"])

        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)

        return X + rs.normal(0, self.noise, size=X.shape) 
Example #10
Source File: randomadder.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X, y):
        super().fit(X, y)
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        self.dim_ = X.shape[1]

        return self 
Example #11
Source File: columncapper.py    From scikit-lego with MIT License 5 votes vote down vote up
def transform(self, X):
        """
        Performs the capping on the column(s) of ``X``.

        :type X: pandas.DataFrame or numpy.ndarray
        :param X: The column(s) for which the capping limit(s) will be applied.

        :rtype: numpy.ndarray
        :returns: ``X`` values with capped limits.

        :raises:
            ``ValueError`` if the number of columns from ``X`` differs from the
            number of columns when fitting
        """
        check_is_fitted(self, "quantiles_")
        X = check_array(
            X,
            copy=self.copy,
            force_all_finite=False,
            dtype=FLOAT_DTYPES,
            estimator=self,
        )

        if X.shape[1] != self.n_columns_:
            raise ValueError(
                "X must have the same number of columns in fit and transform"
            )

        if self.discard_infs:
            np.putmask(X, (X == np.inf) | (X == -np.inf), np.nan)

        # Actually capping
        X = np.minimum(X, self.quantiles_[1, :])
        X = np.maximum(X, self.quantiles_[0, :])

        return X 
Example #12
Source File: umap_reconstruction.py    From scikit-lego with MIT License 5 votes vote down vote up
def predict(self, X):
        """
        Predict if a point is an outlier.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :return: array, shape=(n_samples,) the predicted data. 1 for inliers, -1 for outliers.
        """
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        check_is_fitted(self, ["umap_", "offset_"])
        result = np.ones(X.shape[0])
        result[self.difference(X) > self.threshold] = -1
        return result.astype(np.int) 
Example #13
Source File: umap_reconstruction.py    From scikit-lego with MIT License 5 votes vote down vote up
def transform(self, X):
        """
        Uses the underlying UMAP method to transform the data.
        """
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        check_is_fitted(self, ["umap_", "offset_"])
        return self.umap_.transform(X) 
Example #14
Source File: pca_reconstruction.py    From scikit-lego with MIT License 5 votes vote down vote up
def predict(self, X):
        """
        Predict if a point is an outlier.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :return: array, shape=(n_samples,) the predicted data. 1 for inliers, -1 for outliers.
        """
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        check_is_fitted(self, ["pca_", "offset_"])
        result = np.ones(X.shape[0])
        result[self.difference(X) > self.threshold] = -1
        return result.astype(np.int) 
Example #15
Source File: pca_reconstruction.py    From scikit-lego with MIT License 5 votes vote down vote up
def transform(self, X):
        """
        Uses the underlying PCA method to transform the data.
        """
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        check_is_fitted(self, ["pca_", "offset_"])
        return self.pca_.transform(X) 
Example #16
Source File: linear_regression.py    From differential-privacy-library with MIT License 5 votes vote down vote up
def _preprocess_data(X, y, fit_intercept, epsilon=1.0, bounds_X=None, bounds_y=None, copy=True, check_input=True,
                     **unused_args):
    warn_unused_args(unused_args)

    if check_input:
        X = check_array(X, copy=copy, accept_sparse=False, dtype=FLOAT_DTYPES)
    elif copy:
        X = X.copy(order='K')

    y = np.asarray(y, dtype=X.dtype)
    X_scale = np.ones(X.shape[1], dtype=X.dtype)

    if fit_intercept:
        bounds_X = check_bounds(bounds_X, X.shape[1])
        bounds_y = check_bounds(bounds_y, y.shape[1] if y.ndim > 1 else 1)

        X = clip_to_bounds(X, bounds_X)
        y = clip_to_bounds(y, bounds_y)

        X_offset = mean(X, axis=0, bounds=bounds_X, epsilon=epsilon, accountant=BudgetAccountant())
        X -= X_offset
        y_offset = mean(y, axis=0, bounds=bounds_y, epsilon=epsilon, accountant=BudgetAccountant())
        y = y - y_offset
    else:
        X_offset = np.zeros(X.shape[1], dtype=X.dtype)
        if y.ndim == 1:
            y_offset = X.dtype.type(0)
        else:
            y_offset = np.zeros(y.shape[1], dtype=X.dtype)

    return X, y, X_offset, y_offset, X_scale


# noinspection PyPep8Naming,PyAttributeOutsideInit 
Example #17
Source File: bayesian_gmm_classifier.py    From scikit-lego with MIT License 5 votes vote down vote up
def predict(self, X):
        check_is_fitted(self, ["gmms_", "classes_"])
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        return self.classes_[self.predict_proba(X).argmax(axis=1)] 
Example #18
Source File: bayesian_gmm_classifier.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.array, y: np.array) -> "BayesianGMMClassifier":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            mixture = BayesianGaussianMixture(
                n_components=self.n_components,
                covariance_type=self.covariance_type,
                tol=self.tol,
                reg_covar=self.reg_covar,
                max_iter=self.max_iter,
                n_init=self.n_init,
                init_params=self.init_params,
                weight_concentration_prior_type=self.weight_concentration_prior_type,
                weight_concentration_prior=self.weight_concentration_prior,
                mean_precision_prior=self.mean_precision_prior,
                mean_prior=self.mean_prior,
                degrees_of_freedom_prior=self.degrees_of_freedom_prior,
                covariance_prior=self.covariance_prior,
                random_state=self.random_state,
                warm_start=self.warm_start,
                verbose=self.verbose,
                verbose_interval=self.verbose_interval,
            )
            self.gmms_[c] = mixture.fit(subset_x, subset_y)
        return self 
Example #19
Source File: bayesian_gmm_detector.py    From scikit-lego with MIT License 5 votes vote down vote up
def score_samples(self, X):
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        check_is_fitted(self, ["gmm_", "likelihood_threshold_"])
        if len(X.shape) == 1:
            X = np.expand_dims(X, 1)

        return self.gmm_.score_samples(X) * -1 
Example #20
Source File: gmm_classifier.py    From scikit-lego with MIT License 5 votes vote down vote up
def predict_proba(self, X):
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        check_is_fitted(self, ["gmms_", "classes_"])
        res = np.zeros((X.shape[0], self.classes_.shape[0]))
        for idx, c in enumerate(self.classes_):
            res[:, idx] = self.gmms_[c].score_samples(X)
        return np.exp(res) / np.exp(res).sum(axis=1)[:, np.newaxis] 
Example #21
Source File: gmm_classifier.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.array, y: np.array) -> "GMMClassifier":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            mixture = GaussianMixture(
                n_components=self.n_components,
                covariance_type=self.covariance_type,
                tol=self.tol,
                reg_covar=self.reg_covar,
                max_iter=self.max_iter,
                n_init=self.n_init,
                init_params=self.init_params,
                weights_init=self.weights_init,
                means_init=self.means_init,
                precisions_init=self.precisions_init,
                random_state=self.random_state,
                warm_start=self.warm_start,
                verbose=self.verbose,
                verbose_interval=self.verbose_interval,
            )
            self.gmms_[c] = mixture.fit(subset_x, subset_y)
        return self 
Example #22
Source File: gmm_outlier_detector.py    From scikit-lego with MIT License 5 votes vote down vote up
def score_samples(self, X):
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        check_is_fitted(self, ["gmm_", "likelihood_threshold_"])
        if len(X.shape) == 1:
            X = np.expand_dims(X, 1)

        return -self.gmm_.score_samples(X) 
Example #23
Source File: neighbors.py    From scikit-lego with MIT License 5 votes vote down vote up
def predict(self, X):
        """
        Predict class labels for samples in X.

        :param X: array_like, shape (n_samples, n_features)
        :return: array, shape (n_samples)
        """
        check_is_fitted(self)
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)

        return self.classes_[np.argmax(self.predict_proba(X), 1)] 
Example #24
Source File: neighbors.py    From scikit-lego with MIT License 5 votes vote down vote up
def predict_proba(self, X):
        """
        Probability estimates.

        The returned estimates for all classes are in the same order found in the `.classes_` attribute.

        :param X: array-like of shape (n_samples, n_features)
        :return: array-like of shape (n_samples, n_classes)
            Returns the probability of the sample for each class in the model,
            where classes are ordered as they are in self.classes_.
        """
        check_is_fitted(self)
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)

        log_prior = np.array(
            [self.priors_logp_[target_label] for target_label in self.classes_]
        )

        log_likelihood = np.array(
            [
                self.models_[target_label].score_samples(X)
                for target_label in self.classes_
            ]
        ).T

        log_likelihood_and_prior = np.exp(log_likelihood + log_prior)
        evidence = log_likelihood_and_prior.sum(axis=1, keepdims=True)
        posterior = log_likelihood_and_prior / evidence
        return posterior 
Example #25
Source File: neighbors.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.ndarray, y: np.ndarray):
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_features, n_samples)
        :param y: array-like, shape=(n_samples)
        :return: Returns an instance of self
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)

        self.classes_ = unique_labels(y)
        self.models_, self.priors_logp_ = {}, {}
        for target_label in self.classes_:
            x_subset = X[y == target_label]

            # Computing joint distribution
            self.models_[target_label] = KernelDensity(
                bandwidth=self.bandwidth,
                kernel=self.kernel,
                algorithm=self.algorithm,
                metric=self.metric,
                atol=self.atol,
                rtol=self.rtol,
                breadth_first=self.breath_first,
                leaf_size=self.leaf_size,
                metric_params=self.metric_params,
            ).fit(x_subset)

            # Computing target class prior
            self.priors_logp_[target_label] = np.log(len(x_subset) / len(X))

        return self 
Example #26
Source File: data.py    From sagemaker-scikit-learn-extension with Apache License 2.0 5 votes vote down vote up
def fit(self, X, y=None):
        """Fit RobustStandardScaler to X.

        If input is sparse, `fit` overrides `self.with_mean` to standardize without subtracting mean (avoids breaking
        for sparse matrix)

        If the data is dense, the mean is adjusted for sparse features and the scaled with mean.

        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data to standardize.

        Returns
        -------
        self : RobustStandardScaler
        """
        X = check_array(
            X, accept_sparse=("csr", "csc"), estimator=self, dtype=FLOAT_DTYPES, force_all_finite="allow-nan"
        )

        with_mean = True
        if issparse(X):
            with_mean = False

        self.scaler_ = StandardScaler(with_mean=with_mean, with_std=True, copy=self.copy)
        self.scaler_.fit(X)

        if self.scaler_.with_mean:
            nnz_mean_mask = np.where(np.count_nonzero(X, axis=0) / X.shape[0] > 0.3, 1, 0)
            self.scaler_.mean_ = self.scaler_.mean_ * nnz_mean_mask

        return self 
Example #27
Source File: iterative_imputer.py    From ME-Net with MIT License 5 votes vote down vote up
def _validate_input(self, X):
        allowed_strategies = ["mean", "median", "most_frequent", "constant"]
        if self.strategy not in allowed_strategies:
            raise ValueError("Can only use these strategies: {0} "
                             " got strategy={1}".format(allowed_strategies,
                                                        self.strategy))

        if self.strategy in ("most_frequent", "constant"):
            dtype = None
        else:
            dtype = FLOAT_DTYPES

        if not is_scalar_nan(self.missing_values):
            force_all_finite = True
        else:
            force_all_finite = False # "allow-nan"

        try:
            X = check_array(X, accept_sparse='csc', dtype=dtype,
                            force_all_finite=force_all_finite, copy=self.copy)
        except ValueError as ve:
            if "could not convert" in str(ve):
                raise ValueError("Cannot use {0} strategy with non-numeric "
                                 "data. Received datatype :{1}."
                                 "".format(self.strategy, X.dtype.kind))
            else:
                raise ve

        _check_inputs_dtype(X, self.missing_values)
        if X.dtype.kind not in ("i", "u", "f", "O"):
            raise ValueError("_SimpleImputer does not support data with dtype "
                             "{0}. Please provide either a numeric array (with"
                             " a floating point or integer dtype) or "
                             "categorical data represented either as an array "
                             "with integer dtype or an array of string values "
                             "with an object dtype.".format(X.dtype))

        return X 
Example #28
Source File: text_transformers.py    From cdQA with Apache License 2.0 5 votes vote down vote up
def transform(self, X=None, copy=True, is_query=False):
        """
        Parameters
        ----------
        X : sparse matrix, [n_samples, n_features]
            document-term query matrix
        copy : boolean, optional (default=True)
        query: boolean (default=False)
            whether to transform a query or the documents database

        Returns
        -------
        vectors : sparse matrix, [n_samples, n_features]

        """
        if is_query:
            X = check_array(X, accept_sparse="csr", dtype=FLOAT_DTYPES, copy=copy)
            if not sp.issparse(X):
                X = sp.csr_matrix(X, dtype=np.float64)

            n_samples, n_features = X.shape

            expected_n_features = self._doc_matrix.shape[1]
            if n_features != expected_n_features:
                raise ValueError(
                    "Input has n_features=%d while the model"
                    " has been trained with n_features=%d"
                    % (n_features, expected_n_features)
                )

            if self.use_idf:
                check_is_fitted(self, "_idf_diag", "idf vector is not fitted")
                X = sp.csr_matrix(X.toarray() * self._idf_diag.diagonal())

            return X

        else:
            return self._doc_matrix 
Example #29
Source File: iterative_imputer.py    From ME-Net with MIT License 4 votes vote down vote up
def _initial_imputation(self, X):
        """Perform initial imputation for input X.

        Parameters
        ----------
        X : ndarray, shape (n_samples, n_features)
            Input data, where "n_samples" is the number of samples and
            "n_features" is the number of features.

        Returns
        -------
        Xt : ndarray, shape (n_samples, n_features)
            Input data, where "n_samples" is the number of samples and
            "n_features" is the number of features.

        X_filled : ndarray, shape (n_samples, n_features)
            Input data with the most recent imputations.

        mask_missing_values : ndarray, shape (n_samples, n_features)
            Input data's missing indicator matrix, where "n_samples" is the
            number of samples and "n_features" is the number of features.
        """
        # TODO: change False to "allow-nan"
        if is_scalar_nan(self.missing_values):
            force_all_finite = False # "allow-nan"
        else:
            force_all_finite = True

        X = check_array(X, dtype=FLOAT_DTYPES, order="F",
                        force_all_finite=force_all_finite)
        _check_inputs_dtype(X, self.missing_values)

        mask_missing_values = _get_mask(X, self.missing_values)
        if self.initial_imputer_ is None:
            self.initial_imputer_ = _SimpleImputer(
                                            missing_values=self.missing_values,
                                            strategy=self.initial_strategy)
            X_filled = self.initial_imputer_.fit_transform(X)
        else:
            X_filled = self.initial_imputer_.transform(X)

        valid_mask = np.flatnonzero(np.logical_not(
            np.isnan(self.initial_imputer_.statistics_)))
        Xt = X[:, valid_mask]
        mask_missing_values = mask_missing_values[:, valid_mask]

        return Xt, X_filled, mask_missing_values