# -*- coding: utf-8 -*-

# Copyright (c) 2013 Ole Krause-Sparmann

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

import numpy
import scipy
import scipy.sparse

from nearpy.hashes.lshash import LSHash

from nearpy.utils import numpy_array_from_list_or_numpy_array, perform_pca


class PCADiscretizedProjections(LSHash):
    """
    Projects a vector on n first principal components and assigns
    a discrete value to each projection depending on the bin.
    """

    def __init__(self, hash_name, projection_count, training_set, bin_width):
        """
        Computes principal components for training vector set. Uses
        first projection_count principal components for projections.

        Training set must be either a numpy matrix or a list of
        numpy vectors.
        """
        super(PCADiscretizedProjections, self).__init__(hash_name)
        self.projection_count = projection_count
        self.bin_width = bin_width

        # Only do training if training set was specified
        if not training_set is None:
            # Get numpy array representation of input
            training_set = numpy_array_from_list_or_numpy_array(training_set)

            # Get subspace size from training matrix
            self.dim = training_set.shape[0]

            # Get transposed training set matrix for PCA
            training_set_t = numpy.transpose(training_set)

            # Compute principal components
            (eigenvalues, eigenvectors) = perform_pca(training_set_t)

            # Get largest N eigenvalue/eigenvector indices
            largest_eigenvalue_indices = numpy.flipud(
                scipy.argsort(eigenvalues))[:projection_count]

            # Create matrix for first N principal components
            self.components = numpy.zeros((self.dim,
                                           len(largest_eigenvalue_indices)))

            # Put first N principal components into matrix
            for index in range(len(largest_eigenvalue_indices)):
                self.components[:, index] = \
                    eigenvectors[:, largest_eigenvalue_indices[index]]

            # We need the component vectors to be in the rows
            self.components = numpy.transpose(self.components)

        # This is only used in case we need to process sparse vectors
        self.components_csr = None

    def reset(self, dim):
        """ Resets / Initializes the hash for the specified dimension. """
        if self.dim != dim:
            raise Exception('PCA hash is trained for specific dimension!')

    def hash_vector(self, v, querying=False):
        """
        Hashes the vector and returns the binary bucket key as string.
        """
        if scipy.sparse.issparse(v):
            # If vector is sparse, make sure we have the CSR representation
            # of the projection matrix
            if self.components_csr == None:
                self.components_csr = scipy.sparse.csr_matrix(self.components)
            # Make sure that we are using CSR format for multiplication
            if not scipy.sparse.isspmatrix_csr(v):
                v = scipy.sparse.csr_matrix(v)
            # Project vector onto all hyperplane normals
            projection = (self.components_csr.dot(
                v) / self.bin_width).floor().toarray()
        else:
            # Project vector onto components
            projection = numpy.dot(self.components, v)
            projection = numpy.floor(projection / self.bin_width)
        # Return key
        return ['_'.join([str(int(x)) for x in projection])]

    def get_config(self):
        """
        Returns pickle-serializable configuration struct for storage.
        """
        # Fill this dict with config data
        return {
            'hash_name': self.hash_name,
            'dim': self.dim,
            'bin_width': self.bin_width,
            'projection_count': self.projection_count,
            'components': self.components
        }

    def apply_config(self, config):
        """
        Applies config
        """
        self.hash_name = config['hash_name']
        self.dim = config['dim']
        self.bin_width = config['bin_width']
        self.projection_count = config['projection_count']
        self.components = config['components']