#
#*******************************************************************************
# Copyright 2014-2020 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#******************************************************************************/

import numpy as np
import numbers

from sklearn import decomposition
from sklearn.utils import check_array

from sklearn.decomposition._pca import PCA as PCA_original
from sklearn.decomposition._pca import (_infer_dimension_, svd_flip)

from sklearn.utils.validation import check_is_fitted
from sklearn.utils.extmath import stable_cumsum
from scipy.sparse import issparse

import daal4py
from .._utils import getFPType, method_uses_sklearn, method_uses_daal
import logging


def _daal4py_svd(X):
    X = check_array(X, dtype=[np.float64, np.float32])
    X_fptype = getFPType(X)
    alg = daal4py.svd(
        fptype=X_fptype,
        method='defaultDense',
        leftSingularMatrix='requiredInPackedForm',
        rightSingularMatrix='requiredInPackedForm'
    )
    res = alg.compute(X)
    s = res.singularValues
    U = res.leftSingularMatrix
    V = res.rightSingularMatrix
    return U, np.ravel(s), V


def _validate_n_components(n_components, n_samples, n_features):
    if n_components == 'mle':
        if n_samples < n_features:
            raise ValueError("n_components='mle' is only supported "
                             "if n_samples >= n_features")
    elif not 0 <= n_components <= min(n_samples, n_features):
        raise ValueError("n_components=%r must be between 0 and "
                         "min(n_samples, n_features)=%r with "
                         "svd_solver='full'"
                         % (n_components, min(n_samples, n_features)))
    elif n_components >= 1:
        if not isinstance(n_components, (numbers.Integral, np.integer)):
            raise ValueError("n_components=%r must be of type int "
                             "when greater than or equal to 1, "
                             "was of type=%r"
                             % (n_components, type(n_components)))


def _process_n_components_None(self_n_components, self_svd_solver, X_shape):
    # Handle n_components==None
    if self_n_components is None:
        if self_svd_solver != 'arpack':
            n_components = min(X_shape)
        else:
            n_components = min(X_shape) - 1
    else:
        n_components = self_n_components

    return n_components


def _n_components_from_fraction(explained_variance_ratio, frac):
    # number of components for which the cumulated explained
    # variance percentage is superior to the desired threshold
    # side='right' ensures that number of features selected
    # their variance is always greater than n_components float
    # passed. More discussion in issue: #15669
    ratio_cumsum = stable_cumsum(explained_variance_ratio)
    n_components = np.searchsorted(ratio_cumsum, frac,
                                   side='right') + 1
    return n_components
    

def _fit_full(self, X, n_components):
    """Fit the model by computing full SVD on X"""
    n_samples, n_features = X.shape

    _validate_n_components(n_components, n_samples, n_features)

    # Center data
    self.mean_ = np.mean(X, axis=0)
    X -= self.mean_

    if X.shape[0] > X.shape[1] and (X.dtype == np.float64 or X.dtype == np.float32):
        U, S, V = _daal4py_svd(X)
    else:
        U, S, V = np.linalg.svd(X, full_matrices=False)
    # flip eigenvectors' sign to enforce deterministic output
    U, V = svd_flip(U, V)

    components_ = V

    # Get variance explained by singular values
    explained_variance_ = (S ** 2) / (n_samples - 1)
    total_var = explained_variance_.sum()
    explained_variance_ratio_ = explained_variance_ / total_var

    # Postprocess the number of components required
    if n_components == 'mle':
        n_components = \
            _infer_dimension_(explained_variance_, n_samples, n_features)
    elif 0 < n_components < 1.0:
        n_components = _n_components_from_fraction(
            explained_variance_ratio_, n_components)

    # Compute noise covariance using Probabilistic PCA model
    # The sigma2 maximum likelihood (cf. eq. 12.46)
    if n_components < min(n_features, n_samples):
        self.noise_variance_ = explained_variance_[n_components:].mean()
    else:
        self.noise_variance_ = 0.

    self.n_samples_, self.n_features_ = n_samples, n_features
    self.components_ = components_[:n_components]
    self.n_components_ = n_components
    self.explained_variance_ = explained_variance_[:n_components]
    self.explained_variance_ratio_ = \
        explained_variance_ratio_[:n_components]
    self.singular_values_ = S[:n_components]

    return U, S, V


_fit_full_copy = _fit_full

class PCA_prev(PCA_original):
    __doc__ = PCA_original.__doc__

    def __init__(self, n_components=None, copy=True, whiten=False,
                 svd_solver='auto', tol=0.0, iterated_power='auto',
                 random_state=None):
        self.n_components = n_components
        self.copy = copy
        self.whiten = whiten
        self.svd_solver = svd_solver
        self.tol = tol
        self.iterated_power = iterated_power
        self.random_state = random_state

    def _fit_full(self, X, n_components):
        return _fit_full_copy(self, X, n_components)


class PCA(PCA_original):
    def __init__(self, n_components=None, copy=True, whiten=False,
                 svd_solver='auto', tol=0.0, iterated_power='auto',
                 random_state=None):
        self.n_components = n_components
        self.copy = copy
        self.whiten = whiten
        self.svd_solver = svd_solver
        self.tol = tol
        self.iterated_power = iterated_power
        self.random_state = random_state


    def _fit_daal4py(self, X, n_components):
        n_samples, n_features = X.shape
        n_sf_min = min(n_samples, n_features)

        _validate_n_components(n_components, n_samples, n_features)

        if n_components == 'mle':
            daal_n_components = n_features
        elif n_components < 1:
            daal_n_components = n_sf_min
        else:
            daal_n_components = n_components

        fpType = getFPType(X)
        centering_algo = daal4py.normalization_zscore(
            fptype=fpType, doScale=False)
        pca_alg = daal4py.pca(
            fptype=fpType,
            method='svdDense',
            normalization=centering_algo,
            resultsToCompute='mean|variance|eigenvalue',
            isDeterministic=True,
            nComponents=daal_n_components
        )
        pca_res = pca_alg.compute(X)

        self.mean_ = pca_res.means.ravel()
        variances_ = pca_res.variances.ravel()
        components_ = pca_res.eigenvectors
        explained_variance_ = pca_res.eigenvalues.ravel()
        tot_var  = explained_variance_.sum()
        explained_variance_ratio_ = explained_variance_ / tot_var

        if n_components == 'mle':
            n_components = \
                _infer_dimension_(explained_variance_, n_samples, n_features)
        elif 0 < n_components < 1.0:
            n_components = _n_components_from_fraction(
                explained_variance_ratio_, n_components)

        # Compute noise covariance using Probabilistic PCA model
        # The sigma2 maximum likelihood (cf. eq. 12.46)
        if n_components < n_sf_min:
            if explained_variance_.shape[0] == n_sf_min:
                self.noise_variance_ = explained_variance_[n_components:].mean()
            else:
                resid_var_ = variances_.sum()
                resid_var_ -= explained_variance_[:n_components].sum()
                self.noise_variance_ = resid_var_ / (n_sf_min - n_components)
        else:
            self.noise_variance_ = 0.

        self.n_samples_, self.n_features_ = n_samples, n_features
        self.components_ = components_[:n_components]
        self.n_components_ = n_components
        self.explained_variance_ = explained_variance_[:n_components]
        self.explained_variance_ratio_ = \
            explained_variance_ratio_[:n_components]
        self.singular_values_ = np.sqrt((n_samples - 1) * self.explained_variance_)


    def _transform_daal4py(self, X, whiten=False, scale_eigenvalues=True, check_X=True):
        check_is_fitted(self)

        X = check_array(X, dtype=[np.float64, np.float32], force_all_finite=check_X)
        fpType = getFPType(X)

        tr_data = dict()
        if self.mean_ is not None:
            tr_data['mean'] = self.mean_.reshape((1, -1))
        if whiten:
            if scale_eigenvalues:
                tr_data['eigenvalue'] = (self.n_samples_ - 1) * self.explained_variance_.reshape((1, -1))
            else:
                tr_data['eigenvalue'] = self.explained_variance_.reshape((1, -1))
        elif scale_eigenvalues:
            tr_data['eigenvalue'] = np.full(
                (1, self.explained_variance_.shape[0]),
                self.n_samples_ - 1.0, dtype=X.dtype)

        if X.shape[1] != self.n_features_:
            raise ValueError("The number of features of the input data, {}, is not "
                              "equal to the number of features of the training data, {}".format(
                                  X.shape[1], self.n_features_))
        tr_res = daal4py.pca_transform(
            fptype=fpType
        ).compute(X, self.components_, tr_data)

        return tr_res.transformedData


    def _fit_full_daal4py(self, X, n_components):
        n_samples, n_features = X.shape

        # due to need to flip components, need to do full decomposition
        self._fit_daal4py(X, min(n_samples, n_features))
        U = self._transform_daal4py(X, whiten=True, check_X=False, scale_eigenvalues=True)
        V = self.components_
        U, V = svd_flip(U, V)
        U = U.copy()
        V = V.copy()
        S = self.singular_values_.copy()

        if n_components == 'mle':
            n_components = \
                _infer_dimension_(self.explained_variance_, n_samples, n_features)
        elif 0 < n_components < 1.0:
            n_components = _n_components_from_fraction(
                self.explained_variance_ratio_, n_components)

        # Compute noise covariance using Probabilistic PCA model
        # The sigma2 maximum likelihood (cf. eq. 12.46)
        if n_components < min(n_features, n_samples):
            self.noise_variance_ = self.explained_variance_[n_components:].mean()
        else:
            self.noise_variance_ = 0.

        self.n_samples_, self.n_features_ = n_samples, n_features
        self.components_ = self.components_[:n_components]
        self.n_components_ = n_components
        self.explained_variance_ = self.explained_variance_[:n_components]
        self.explained_variance_ratio_ = \
            self.explained_variance_ratio_[:n_components]
        self.singular_values_ = self.singular_values_[:n_components]

        return U, S, V


    def _fit_full_vanilla(self, X, n_components):
        """Fit the model by computing full SVD on X"""
        n_samples, n_features = X.shape

        # Center data
        self.mean_ = np.mean(X, axis=0)
        X -= self.mean_

        U, S, V = np.linalg.svd(X, full_matrices=False)
        # flip eigenvectors' sign to enforce deterministic output
        U, V = svd_flip(U, V)

        components_ = V

        # Get variance explained by singular values
        explained_variance_ = (S ** 2) / (n_samples - 1)
        total_var = explained_variance_.sum()
        explained_variance_ratio_ = explained_variance_ / total_var

        # Postprocess the number of components required
        if n_components == 'mle':
            n_components = \
               _infer_dimension_(explained_variance_, n_samples, n_features)
        elif 0 < n_components < 1.0:
            n_components = _n_components_from_fraction(
                explained_variance_ratio_, n_components)

        # Compute noise covariance using Probabilistic PCA model
        # The sigma2 maximum likelihood (cf. eq. 12.46)
        if n_components < min(n_features, n_samples):
            self.noise_variance_ = explained_variance_[n_components:].mean()
        else:
            self.noise_variance_ = 0.

        self.n_samples_, self.n_features_ = n_samples, n_features
        self.components_ = components_[:n_components]
        self.n_components_ = n_components
        self.explained_variance_ = explained_variance_[:n_components]
        self.explained_variance_ratio_ = \
                    explained_variance_ratio_[:n_components]
        self.singular_values_ = S[:n_components]

        return U, S, V


    def _fit_full(self, X, n_components):
        n_samples, n_features = X.shape

        _validate_n_components(n_components, n_samples, n_features)

        if n_samples > n_features and (X.dtype == np.float64 or X.dtype == np.float32):
            logging.info("sklearn.decomposition.PCA.fit: " + method_uses_daal)
            return self._fit_full_daal4py(X, n_components)
        else:
            logging.info("sklearn.decomposition.PCA.fit: " + method_uses_sklearn)
            return self._fit_full_vanilla(X, n_components)


    def _fit(self, X):
        """Dispatch to the right submethod depending on the chosen solver."""

        # Raise an error for sparse input.
        # This is more informative than the generic one raised by check_array.
        if issparse(X):
            raise TypeError('PCA does not support sparse input. See '
                            'TruncatedSVD for a possible alternative.')

        X = check_array(X, dtype=[np.float64, np.float32], ensure_2d=True,
                        copy=self.copy)

        # Handle n_components==None
        n_components = _process_n_components_None(
            self.n_components, self.svd_solver, X.shape)

        # Handle svd_solver
        self._fit_svd_solver = self.svd_solver
        if self._fit_svd_solver == 'auto':
            # Small problem or n_components == 'mle', just call full PCA
            if max(X.shape) <= 500 or n_components == 'mle':
                self._fit_svd_solver = 'full'
            elif n_components >= 1 and n_components < .8 * min(X.shape):
                self._fit_svd_solver = 'randomized'
            # This is also the case of n_components in (0,1)
            else:
                self._fit_svd_solver = 'full'

        # Call different fits for either full or truncated SVD
        if self._fit_svd_solver == 'full':
            return self._fit_full(X, n_components)
        elif self._fit_svd_solver in ['arpack', 'randomized']:
            logging.info("sklearn.decomposition.PCA.fit: " + method_uses_sklearn)
            return self._fit_truncated(X, n_components, self._fit_svd_solver)
        elif self._fit_svd_solver == 'daal':
            if X.shape[0] < X.shape[1]:
                raise ValueError("svd_solver='daal' is applicable for tall and skinny inputs only.")
            logging.info("sklearn.decomposition.PCA.fit: " + method_uses_daal)
            return self._fit_daal4py(X, n_components)
        else:
            raise ValueError("Unrecognized svd_solver='{0}'"
                             "".format(self._fit_svd_solver))


    def fit_transform(self, X, y=None):
        """Fit the model with X and apply the dimensionality reduction on X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y : Ignored

        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)

        """
        if (self.svd_solver == 'daal' and isinstance(X, np.ndarray) and
               X.shape[0] >= X.shape[1]):
            # Handle n_components==None
            n_components = _process_n_components_None(
                self.n_components, self.svd_solver, X.shape)
            logging.info("sklearn.decomposition.PCA.fit: " + method_uses_daal)
            self._fit_daal4py(X, n_components)
            logging.info("sklearn.decomposition.PCA.transform: " + method_uses_daal)
            if self.n_components_ > 0:
                return self._transform_daal4py(X, whiten=self.whiten, check_X=False)
            else:
                return np.empty((self.n_samples_, 0), dtype=X.dtype)
        else:
            U, S, V = self._fit(X)
            U = U[:, :self.n_components_]

            logging.info("sklearn.decomposition.PCA.transform: " + method_uses_sklearn)
            if self.whiten:
                # X_new = X * V / S * sqrt(n_samples) = U * sqrt(n_samples)
                U *= np.sqrt(X.shape[0] - 1)
            else:
                # X_new = X * V = U * S * V^T * V = U * S
                U *= S[:self.n_components_]

            return U

    def transform(self, X):
        """Apply dimensionality reduction to X.

        X is projected on the first principal components previously extracted
        from a training set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            New data, where n_samples is the number of samples
            and n_features is the number of features.

        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)

        Examples
        --------

        >>> import numpy as np
        >>> from sklearn.decomposition import IncrementalPCA
        >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
        >>> ipca = IncrementalPCA(n_components=2, batch_size=3)
        >>> ipca.fit(X)
        IncrementalPCA(batch_size=3, copy=True, n_components=2, whiten=False)
        >>> ipca.transform(X) # doctest: +SKIP
        """
        check_is_fitted(self)

        X = check_array(X)
        if self.n_components_ > 0:
            logging.info("sklearn.decomposition.PCA.transform: " + method_uses_daal)
            return self._transform_daal4py(X, whiten=self.whiten,
                                           check_X=False, scale_eigenvalues=False)
        else:
            logging.info("sklearn.decomposition.PCA.transform: " + method_uses_sklearn)
            if self.mean_ is not None:
                X = X - self.mean_
                X_transformed = np.dot(X, self.components_.T)
            if self.whiten:
                X_transformed /= np.sqrt(self.explained_variance_)
            return X_transformed

if (lambda s: (int(s[:4]), int(s[6:])))( daal4py.__daal_link_version__[:8] ) < (2019, 4):
    # with DAAL < 2019.4 PCA only optimizes fit, using DAAL's SVD
    class PCA(PCA_original):
        __doc__ = PCA_original.__doc__

        def __init__(self, n_components=None, copy=True, whiten=False,
                     svd_solver='auto', tol=0.0, iterated_power='auto',
                     random_state=None):
            self.n_components = n_components
            self.copy = copy
            self.whiten = whiten
            self.svd_solver = svd_solver
            self.tol = tol
            self.iterated_power = iterated_power
            self.random_state = random_state

        def _fit_full(self, X, n_components):
            return _fit_full_copy(self, X, n_components)