import ast
import logging

import numpy as np
from bson.binary import Binary
from pandas import DataFrame, Series, Panel

from arctic._util import NP_OBJECT_DTYPE
from arctic.serialization.numpy_records import SeriesSerializer, DataFrameSerializer
from ._ndarray_store import NdarrayStore
from .._compression import compress, decompress
from .._config import FORCE_BYTES_TO_UNICODE
from ..date._util import to_pandas_closed_closed
from ..exceptions import ArcticException

log = logging.getLogger(__name__)

DTN64_DTYPE = 'datetime64[ns]'

INDEX_DTYPE = [('datetime', DTN64_DTYPE), ('index', 'i8')]


class PandasStore(NdarrayStore):

    def _segment_index(self, recarr, existing_index, start, new_segments):
        """
        Generate index of datetime64 -> item offset.

        Parameters:
        -----------
        new_data: new data being written (or appended)
        existing_index: index field from the versions document of the previous version
        start: first (0-based) offset of the new data
        segments: list of offsets. Each offset is the row index of the
                  the last row of a particular chunk relative to the start of the _original_ item.
                  array(new_data) - segments = array(offsets in item)

        Returns:
        --------
        Binary(compress(array([(index, datetime)]))
            Where index is the 0-based index of the datetime in the DataFrame
        """
        # find the index of the first datetime64 column
        idx_col = self._datetime64_index(recarr)
        # if one exists let's create the index on it
        if idx_col is not None:
            new_segments = np.array(new_segments, dtype='i8')
            last_rows = recarr[new_segments - start]
            # create numpy index
            index = np.core.records.fromarrays([last_rows[idx_col]] + [new_segments, ], dtype=INDEX_DTYPE)
            # append to existing index if exists
            if existing_index:
                # existing_index_arr is read-only but it's never written to
                existing_index_arr = np.frombuffer(decompress(existing_index), dtype=INDEX_DTYPE)
                if start > 0:
                    existing_index_arr = existing_index_arr[existing_index_arr['index'] < start]
                index = np.concatenate((existing_index_arr, index))
            return Binary(compress(index.tostring()))
        elif existing_index:
            raise ArcticException("Could not find datetime64 index in item but existing data contains one")
        return None

    def _datetime64_index(self, recarr):
        """ Given a np.recarray find the first datetime64 column """
        # TODO: Handle multi-indexes
        names = recarr.dtype.names
        for name in names:
            if recarr[name].dtype == DTN64_DTYPE:
                return name
        return None

    def read_options(self):
        return ['date_range']

    def _index_range(self, version, symbol, date_range=None, **kwargs):
        """ Given a version, read the segment_index and return the chunks associated
        with the date_range. As the segment index is (id -> last datetime)
        we need to take care in choosing the correct chunks. """
        if date_range and 'segment_index' in version:
            # index is read-only but it's never written to
            index = np.frombuffer(decompress(version['segment_index']), dtype=INDEX_DTYPE)
            dtcol = self._datetime64_index(index)
            if dtcol and len(index):
                dts = index[dtcol]
                start, end = _start_end(date_range, dts)
                if start > dts[-1]:
                    return -1, -1
                idxstart = min(np.searchsorted(dts, start), len(dts) - 1)
                idxend = min(np.searchsorted(dts, end, side='right'), len(dts) - 1)
                return int(index['index'][idxstart]), int(index['index'][idxend] + 1)
        return super(PandasStore, self)._index_range(version, symbol, **kwargs)

    def _daterange(self, recarr, date_range):
        """ Given a recarr, slice out the given artic.date.DateRange if a
        datetime64 index exists """
        idx = self._datetime64_index(recarr)
        if idx and len(recarr):
            dts = recarr[idx]
            mask = Series(np.zeros(len(dts)), index=dts)
            start, end = _start_end(date_range, dts)
            mask[start:end] = 1.0
            return recarr[mask.values.astype(bool)]
        return recarr

    def read(self, arctic_lib, version, symbol, read_preference=None, date_range=None, **kwargs):
        item = super(PandasStore, self).read(arctic_lib, version, symbol, read_preference,
                                             date_range=date_range, **kwargs)
        if date_range:
            item = self._daterange(item, date_range)
        return item

    def get_info(self, version):
        """
        parses out the relevant information in version
        and returns it to the user in a dictionary
        """
        ret = super(PandasStore, self).get_info(version)
        ret['col_names'] = version['dtype_metadata']
        ret['handler'] = self.__class__.__name__
        ret['dtype'] = ast.literal_eval(version['dtype'])
        return ret


def _start_end(date_range, dts):
    """
    Return tuple: [start, end] of np.datetime64 dates that are inclusive of the passed
    in datetimes.
    """
    # FIXME: timezones
    assert len(dts)
    _assert_no_timezone(date_range)
    date_range = to_pandas_closed_closed(date_range, add_tz=False)
    start = np.datetime64(date_range.start) if date_range.start else dts[0]
    end = np.datetime64(date_range.end) if date_range.end else dts[-1]
    return start, end


def _assert_no_timezone(date_range):
    for _dt in (date_range.start, date_range.end):
        if _dt and _dt.tzinfo is not None:
            raise ValueError("DateRange with timezone not supported")


class PandasSeriesStore(PandasStore):
    TYPE = 'pandasseries'
    SERIALIZER = SeriesSerializer()

    @staticmethod
    def can_write_type(data):
        return isinstance(data, Series)

    def can_write(self, version, symbol, data):
        if self.can_write_type(data):
            # Series has always a single-column
            if data.dtype is NP_OBJECT_DTYPE or data.index.dtype is NP_OBJECT_DTYPE:
                return self.SERIALIZER.can_convert_to_records_without_objects(data, symbol)
            return True
        return False

    def write(self, arctic_lib, version, symbol, item, previous_version):
        item, md = self.SERIALIZER.serialize(item)
        super(PandasSeriesStore, self).write(arctic_lib, version, symbol, item, previous_version, dtype=md)

    def append(self, arctic_lib, version, symbol, item, previous_version, **kwargs):
        item, md = self.SERIALIZER.serialize(item)
        super(PandasSeriesStore, self).append(arctic_lib, version, symbol, item, previous_version, dtype=md, **kwargs)

    def read_options(self):
        return super(PandasSeriesStore, self).read_options()

    def read(self, arctic_lib, version, symbol, **kwargs):
        item = super(PandasSeriesStore, self).read(arctic_lib, version, symbol, **kwargs)
        # Try to check if force_bytes_to_unicode is set in kwargs else use the config value (which defaults to False)
        force_bytes_to_unicode = kwargs.get('force_bytes_to_unicode', FORCE_BYTES_TO_UNICODE)
        return self.SERIALIZER.deserialize(item, force_bytes_to_unicode=force_bytes_to_unicode)


class PandasDataFrameStore(PandasStore):
    TYPE = 'pandasdf'
    SERIALIZER = DataFrameSerializer()

    @staticmethod
    def can_write_type(data):
        return isinstance(data, DataFrame)

    def can_write(self, version, symbol, data):
        if self.can_write_type(data):
            if NP_OBJECT_DTYPE in data.dtypes.values or data.index.dtype is NP_OBJECT_DTYPE:
                return self.SERIALIZER.can_convert_to_records_without_objects(data, symbol)
            return True
        return False

    def write(self, arctic_lib, version, symbol, item, previous_version):
        item, md = self.SERIALIZER.serialize(item)
        super(PandasDataFrameStore, self).write(arctic_lib, version, symbol, item, previous_version, dtype=md)

    def append(self, arctic_lib, version, symbol, item, previous_version, **kwargs):
        item, md = self.SERIALIZER.serialize(item)
        super(PandasDataFrameStore, self).append(arctic_lib, version, symbol, item, previous_version, dtype=md, **kwargs)

    def read(self, arctic_lib, version, symbol, **kwargs):
        item = super(PandasDataFrameStore, self).read(arctic_lib, version, symbol, **kwargs)
        # Try to check if force_bytes_to_unicode is set in kwargs else use the config value (which defaults to False)
        force_bytes_to_unicode = kwargs.get('force_bytes_to_unicode', FORCE_BYTES_TO_UNICODE)
        return self.SERIALIZER.deserialize(item, force_bytes_to_unicode=force_bytes_to_unicode)

    def read_options(self):
        return super(PandasDataFrameStore, self).read_options()


class PandasPanelStore(PandasDataFrameStore):
    TYPE = 'pandaspan'

    @staticmethod
    def can_write_type(data):
        return isinstance(data, Panel)

    def can_write(self, version, symbol, data):
        if self.can_write_type(data):
            frame = data.to_frame(filter_observations=False)
            if NP_OBJECT_DTYPE in frame.dtypes.values or (hasattr(data, 'index') and data.index.dtype is NP_OBJECT_DTYPE):
                return self.SERIALIZER.can_convert_to_records_without_objects(frame, symbol)
            return True
        return False

    def write(self, arctic_lib, version, symbol, item, previous_version):
        if np.product(item.shape) == 0:
            # Currently not supporting zero size panels as they drop indices when converting to dataframes
            # Plan is to find a better solution in due course.
            raise ValueError('Cannot insert a zero size panel into mongo.')
        if not np.all(len(i.names) == 1 for i in item.axes):
            raise ValueError('Cannot insert panels with multiindexes')
        item = item.to_frame(filter_observations=False)
        if len(set(item.dtypes)) == 1:
            # If all columns have the same dtype, we support non-string column names.
            # We know from above check that columns is not a multiindex.
            item = DataFrame(item.stack())
        elif item.columns.dtype != np.dtype('object'):
            raise ValueError('Cannot support non-object dtypes for columns')
        super(PandasPanelStore, self).write(arctic_lib, version, symbol, item, previous_version)

    def read(self, arctic_lib, version, symbol, **kwargs):
        item = super(PandasPanelStore, self).read(arctic_lib, version, symbol, **kwargs)
        if len(item.index.names) == 3:
            return item.iloc[:, 0].unstack().to_panel()
        return item.to_panel()

    def read_options(self):
        return super(PandasPanelStore, self).read_options()

    def append(self, arctic_lib, version, symbol, item, previous_version, **kwargs):
        raise ValueError('Appending not supported for pandas.Panel')