import logging
import os
import re
import threading

import pymongo
from pymongo.errors import OperationFailure, AutoReconnect
from six import string_types

from ._cache import Cache
from ._config import ENABLE_CACHE
from ._util import indent
from .auth import authenticate, get_auth
from .chunkstore import chunkstore
from .decorators import mongo_retry
from .exceptions import LibraryNotFoundException, ArcticException, QuotaExceededException
from .hooks import get_mongodb_uri
from .store import version_store, bson_store, metadata_store
from .tickstore import tickstore, toplevel

__all__ = ['Arctic', 'VERSION_STORE', 'METADATA_STORE', 'TICK_STORE', 'CHUNK_STORE', 'register_library_type']

# Set default logging handler to avoid "No handler found" warnings.
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())

# Default Arctic application name: 'arctic'
APPLICATION_NAME = 'arctic'
VERSION_STORE = version_store.VERSION_STORE_TYPE
METADATA_STORE = metadata_store.METADATA_STORE_TYPE
TICK_STORE = tickstore.TICK_STORE_TYPE
CHUNK_STORE = chunkstore.CHUNK_STORE_TYPE
LIBRARY_TYPES = {version_store.VERSION_STORE_TYPE: version_store.VersionStore,
                 tickstore.TICK_STORE_TYPE: tickstore.TickStore,
                 toplevel.TICK_STORE_TYPE: toplevel.TopLevelTickStore,
                 chunkstore.CHUNK_STORE_TYPE: chunkstore.ChunkStore,
                 bson_store.BSON_STORE_TYPE: bson_store.BSONStore,
                 metadata_store.METADATA_STORE_TYPE: metadata_store.MetadataStore
                 }


def register_library_type(name, type_):
    """
    Register a Arctic Library Type handler
    """
    if name in LIBRARY_TYPES:
        raise ArcticException("Library %s already registered as %s" % (name, LIBRARY_TYPES[name]))
    LIBRARY_TYPES[name] = type_


class Arctic(object):
    """
    The Arctic class is a top-level God object, owner of all arctic_<user> databases
    accessible in Mongo.
    Each database contains one or more ArcticLibrarys which may have implementation
    specific functionality.

    Current Mongo Library types:
       - arctic.VERSION_STORE - Versioned store for chunked Pandas and numpy objects
                                (other Python types are pickled)
       - arctic.TICK_STORE - Tick specific library. Supports 'snapshots', efficiently
                             stores updates, not versioned.
       - arctic.METADATA_STORE - Stores metadata with timestamps

    Arctic and ArcticLibrary are responsible for Connection setup, authentication,
    dispatch to the appropriate library implementation, and quotas.
    """
    DB_PREFIX = 'arctic'
    METADATA_COLL = "ARCTIC"
    METADATA_DOC_ID = "ARCTIC_META"

    _MAX_CONNS = 4
    __conn = None

    def __init__(self, mongo_host, app_name=APPLICATION_NAME, allow_secondary=False,
                 socketTimeoutMS=10 * 60 * 1000, connectTimeoutMS=2 * 1000,
                 serverSelectionTimeoutMS=30 * 1000, **kwargs):
        """
        Constructs a Arctic Datastore.

        Note: If mongo_host is a pymongo connection and the process is later forked, the
                new pymongo connection may have different parameters.

        Parameters:
        -----------
        mongo_host: A MongoDB hostname, alias or Mongo Connection

        app_name: `str` is the name of application used for resolving credentials when
            authenticating against the mongo_host.
            We will fetch credentials using the authentication hook.
            Teams should override this such that different applications don't accidentally
            run with privileges to other applications' databases

        allow_secondary: `bool` indicates if we allow reads against
             secondary members in the cluster.  These reads may be
             a few seconds behind (but are usually split-second up-to-date).

        serverSelectionTimeoutMS: `int` the main tunable used for configuring how long
            the pymongo driver will spend on MongoDB cluster discovery.  This parameter
            takes precedence over connectTimeoutMS: https://jira.mongodb.org/browse/DRIVERS-222

        kwargs: 'dict' extra keyword arguments to pass when calling pymongo.MongoClient,
            for example ssl parameters.
        """
        self._application_name = app_name
        self._library_cache = {}
        self._allow_secondary = allow_secondary
        self._socket_timeout = socketTimeoutMS
        self._connect_timeout = connectTimeoutMS
        self._server_selection_timeout = serverSelectionTimeoutMS
        self._lock = threading.RLock()
        self._pid = os.getpid()
        self._pymongo_kwargs = kwargs
        self._cache = None

        if isinstance(mongo_host, string_types):
            self._given_instance = False
            self.mongo_host = mongo_host
        else:
            self._given_instance = True
            self.__conn = mongo_host
            # Workaround for: https://jira.mongodb.org/browse/PYTHON-927
            mongo_host.server_info()
            self.mongo_host = ",".join(["{}:{}".format(x[0], x[1]) for x in mongo_host.nodes])
            self._adminDB = self._conn.admin
            self._cache = Cache(self._conn)

    @property
    @mongo_retry
    def _conn(self):
        with self._lock:
            # We must make sure that no MongoClient instances are used from parent after fork:
            #    http://api.mongodb.com/python/current/faq.html#using-pymongo-with-multiprocessing
            curr_pid = os.getpid()
            if curr_pid != self._pid:
                if self._given_instance:
                    logger.warn("Forking process. Arctic was passed a pymongo connection during init, "
                                "the new pymongo connection may have different parameters.")
                self._pid = curr_pid  # this line has to precede reset() otherwise we get to eternal recursion
                self.reset()  # also triggers re-auth

            if self.__conn is None:
                host = get_mongodb_uri(self.mongo_host)
                logger.info("Connecting to mongo: {0} ({1})".format(self.mongo_host, host))
                self.__conn = pymongo.MongoClient(host=host,
                                                  maxPoolSize=self._MAX_CONNS,
                                                  socketTimeoutMS=self._socket_timeout,
                                                  connectTimeoutMS=self._connect_timeout,
                                                  serverSelectionTimeoutMS=self._server_selection_timeout,
                                                  **self._pymongo_kwargs)
                self._adminDB = self.__conn.admin
                self._cache = Cache(self.__conn)

                # Authenticate against admin for the user
                auth = get_auth(self.mongo_host, self._application_name, 'admin')
                if auth:
                    authenticate(self._adminDB, auth.user, auth.password)

                # Accessing _conn is synchronous. The new PyMongo driver may be lazier than the previous.
                # Force a connection.
                self.__conn.server_info()

            return self.__conn

    def reset(self):
        logger.debug("Arctic.reset()")
        with self._lock:
            if self.__conn is not None:
                self.__conn.close()
                self.__conn = None
            for _, l in self._library_cache.items():
                if hasattr(l, '_reset') and callable(l._reset):
                    logger.debug("Library reset() %s" % l)
                    l._reset()  # the existence of _reset() is not guaranteed/enforced, it also triggers re-auth

    def __str__(self):
        return "<Arctic at %s, connected to %s>" % (hex(id(self)), str(self._conn))

    def __repr__(self):
        return str(self)

    def __getstate__(self):
        return {'mongo_host': self.mongo_host,
                'app_name': self._application_name,
                'allow_secondary': self._allow_secondary,
                'socketTimeoutMS': self._socket_timeout,
                'connectTimeoutMS': self._connect_timeout,
                'serverSelectionTimeoutMS': self._server_selection_timeout}

    def __setstate__(self, state):
        return Arctic.__init__(self, **state)

    def is_caching_enabled(self):
        """
        Allows people to enable or disable caching for list_libraries globally.
        """
        _ = self._conn  # Ensures the connection exists and cache is initialized with it.
        return self._cache.is_caching_enabled(ENABLE_CACHE)

    def list_libraries(self, newer_than_secs=None):
        """
        Returns
        -------
        list of Arctic library names
        """
        return self._list_libraries_cached(newer_than_secs) if self.is_caching_enabled() else self._list_libraries()

    @mongo_retry
    def _list_libraries(self):
        libs = []
        for db in self._conn.list_database_names():
            if db.startswith(self.DB_PREFIX + '_'):
                for coll in self._conn[db].list_collection_names():
                    if coll.endswith(self.METADATA_COLL):
                        libs.append(db[len(self.DB_PREFIX) + 1:] + "." + coll[:-1 * len(self.METADATA_COLL) - 1])
            elif db == self.DB_PREFIX:
                for coll in self._conn[db].list_collection_names():
                    if coll.endswith(self.METADATA_COLL):
                        libs.append(coll[:-1 * len(self.METADATA_COLL) - 1])
        return libs

    # Better to be pessimistic here and not retry.
    def _list_libraries_cached(self, newer_than_secs=None):
        """
        Returns
        -------
        List of Arctic library names from a cached collection (global per mongo cluster) in mongo.
        Long term list_libraries should have a use_cached argument.
        """
        _ = self._conn  # Ensures the connection exists and cache is initialized with it.
        cache_data = self._cache.get('list_libraries', newer_than_secs)
        if not cache_data:
            # Try to refresh the cache.
            logging.debug("Cache has expired data, fetching from slow path and reloading cache.")
            libs = self._list_libraries()
            self._cache.set('list_libraries', libs)
            return libs

        return cache_data

    def reload_cache(self):
        _ = self._conn  # Ensures the connection exists and cache is initialized with it.
        self._cache.set('list_libraries', self._list_libraries())

    def library_exists(self, library):
        """
        Check whether a given library exists.

        Parameters
        ----------
        library : `str`
            The name of the library. e.g. 'library' or 'user.library'

        Returns
        -------
        `bool`
            True if the library with the given name already exists, False otherwise
        """
        exists = False
        try:
            # This forces auth errors, and to fall back to the slower "list_collections"
            ArcticLibraryBinding(self, library).get_library_type()
            # This will obtain the library, if no exception thrown we have verified its existence
            self.get_library(library)
            exists = True
        except OperationFailure:
            exists = library in self.list_libraries()
        except LibraryNotFoundException:
            pass
        return exists

    def _sanitize_lib_name(self, library):
        # For list libraries, we don't return the fully qualified lib name. eg. arctic_skhare.test -> skhare.test
        if library.startswith(self.DB_PREFIX + '_'):
            return library[len(self.DB_PREFIX) + 1:]

        return library

    @mongo_retry
    def initialize_library(self, library, lib_type=VERSION_STORE, **kwargs):
        """
        Create an Arctic Library or a particular type.

        Parameters
        ----------
        library : `str`
            The name of the library. e.g. 'library' or 'user.library'

        lib_type : `str`
            The type of the library.  e.g. arctic.VERSION_STORE or arctic.TICK_STORE
            Or any type registered with register_library_type
            Default: arctic.VERSION_STORE

        kwargs :
            Arguments passed to the Library type for initialization.
        """
        lib = ArcticLibraryBinding(self, library)
        # check that we don't create too many namespaces
        # can be disabled check_library_count=False
        check_library_count = kwargs.pop('check_library_count', True)
        if len(self._conn[lib.database_name].list_collection_names()) > 5000 and check_library_count:
            raise ArcticException("Too many namespaces %s, not creating: %s" %
                                  (len(self._conn[lib.database_name].list_collection_names()), library))
        lib.set_library_type(lib_type)
        LIBRARY_TYPES[lib_type].initialize_library(lib, **kwargs)
        # Add a 10G quota just in case the user is calling this with API.
        if not lib.get_quota():
            lib.set_quota(10 * 1024 * 1024 * 1024)

        self._cache.append('list_libraries', self._sanitize_lib_name(library))

    @mongo_retry
    def delete_library(self, library):
        """
        Delete an Arctic Library, and all associated collections in the MongoDB.

        Parameters
        ----------
        library : `str`
            The name of the library. e.g. 'library' or 'user.library'
        """
        lib = ArcticLibraryBinding(self, library)
        colname = lib.get_top_level_collection().name
        if not [c for c in lib._db.list_collection_names(False) if re.match(r"^{}([\.].*)?$".format(colname), c)]:
            logger.info('Nothing to delete. Arctic library %s does not exist.' % colname)
        logger.info('Dropping collection: %s' % colname)
        lib._db.drop_collection(colname)
        for coll in lib._db.list_collection_names():
            if coll.startswith(colname + '.'):
                logger.info('Dropping collection: %s' % coll)
                lib._db.drop_collection(coll)
        if library in self._library_cache:
            del self._library_cache[library]
            del self._library_cache[lib.get_name()]

        self._cache.delete_item_from_key('list_libraries', self._sanitize_lib_name(library))

    def get_library(self, library):
        """
        Return the library instance.  Can generally use slicing to return the library:
            arctic_store[library]

        Parameters
        ----------
        library : `str`
            The name of the library. e.g. 'library' or 'user.library'
        """
        if library in self._library_cache:
            return self._library_cache[library]

        try:
            error = None
            lib = ArcticLibraryBinding(self, library)
            lib_type = lib.get_library_type()
        except (OperationFailure, AutoReconnect) as e:
            error = e

        if error:
            raise LibraryNotFoundException("Library %s was not correctly initialized in %s.\nReason: %r)" %
                                           (library, self, error))
        elif not lib_type:
            raise LibraryNotFoundException("Library %s was not correctly initialized in %s." %
                                           (library, self))
        elif lib_type not in LIBRARY_TYPES:
            raise LibraryNotFoundException("Couldn't load LibraryType '%s' for '%s' (has the class been registered?)" %
                                           (lib_type, library))
        instance = LIBRARY_TYPES[lib_type](lib)
        self._library_cache[library] = instance
        # The library official name may be different from 'library': e.g. 'library' vs 'user.library'
        self._library_cache[lib.get_name()] = instance
        return self._library_cache[library]

    def __getitem__(self, key):
        if isinstance(key, string_types):
            return self.get_library(key)
        else:
            raise ArcticException("Unrecognised library specification - use [libraryName]")

    def set_quota(self, library, quota):
        """
        Set a quota (in bytes) on this user library.  The quota is 'best effort',
        and should be set conservatively.

        Parameters
        ----------
        library : `str`
            The name of the library. e.g. 'library' or 'user.library'

        quota : `int`
            Advisory quota for the library - in bytes
        """
        ArcticLibraryBinding(self, library).set_quota(quota)

    def get_quota(self, library):
        """
        Return the quota currently set on the library.

        Parameters
        ----------
        library : `str`
            The name of the library. e.g. 'library' or 'user.library'
        """
        return ArcticLibraryBinding(self, library).get_quota()

    def check_quota(self, library):
        """
        Check the quota on the library, as would be done during normal writes.

        Parameters
        ----------
        library : `str`
            The name of the library. e.g. 'library' or 'user.library'

        Raises
        ------
        arctic.exceptions.QuotaExceededException if the quota has been exceeded
        """
        ArcticLibraryBinding(self, library).check_quota()

    def rename_library(self, from_lib, to_lib):
        """
        Renames a library

        Parameters
        ----------
        from_lib: str
            The name of the library to be renamed
        to_lib: str
            The new name of the library
        """
        to_colname = to_lib
        if '.' in from_lib and '.' in to_lib:
            if from_lib.split('.')[0] != to_lib.split('.')[0]:
                raise ValueError("Collection can only be renamed in the same database")
            to_colname = to_lib.split('.')[1]

        lib = ArcticLibraryBinding(self, from_lib)
        colname = lib.get_top_level_collection().name

        logger.info('Renaming collection: %s' % colname)
        lib._db[colname].rename(to_colname)
        for coll in lib._db.list_collection_names():
            if coll.startswith(colname + '.'):
                lib._db[coll].rename(coll.replace(colname, to_colname))

        if from_lib in self._library_cache:
            del self._library_cache[from_lib]
            del self._library_cache[lib.get_name()]

        self._cache.update_item_for_key(
            'list_libraries', self._sanitize_lib_name(from_lib), self._sanitize_lib_name(to_lib))

    def get_library_type(self, lib):
        """
        Returns the type of the library

        Parameters
        ----------
        lib: str
            the library
        """
        return ArcticLibraryBinding(self, lib).get_library_type()


class ArcticLibraryBinding(object):
    """
    The ArcticLibraryBinding type holds the binding between the library name and the
    concrete implementation of the library.

    Also provides access to additional metadata about the library
        - Access to the library's top-level collection
        - Enforces quota on the library
        - Access to custom metadata about the library
    """
    DB_PREFIX = Arctic.DB_PREFIX
    TYPE_FIELD = "TYPE"
    QUOTA = 'QUOTA'

    quota = None
    quota_countdown = 0

    @classmethod
    def _parse_db_lib(cls, library):
        """
        Returns the canonical (database_name, library) for the passed in
        string 'library'.
        """
        database_name = library.split('.', 2)
        if len(database_name) == 2:
            library = database_name[1]
            if database_name[0].startswith(cls.DB_PREFIX):
                database_name = database_name[0]
            else:
                database_name = cls.DB_PREFIX + '_' + database_name[0]
        else:
            database_name = cls.DB_PREFIX
        return database_name, library

    def __init__(self, arctic, library):
        self.arctic = arctic
        self._curr_conn = self.arctic._conn
        self._lock = threading.RLock()
        database_name, library = self._parse_db_lib(library)
        self.library = library
        self.database_name = database_name
        self._auth(self.arctic._conn[self.database_name])

    @property
    def _db(self):
        with self._lock:
            arctic_conn = self.arctic._conn
            if arctic_conn is not self._curr_conn:
                self._auth(arctic_conn[self.database_name])  # trigger re-authentication if Arctic has been reset
                self._curr_conn = arctic_conn
        return self.arctic._conn[self.database_name]

    @property
    def _library_coll(self):
        return self._db[self.library]

    def __str__(self):
        return """<ArcticLibrary at %s, %s.%s>
%s""" % (hex(id(self)), self._db.name, self._library_coll.name, indent(str(self.arctic), 4))

    def __repr__(self):
        return str(self)

    def __getstate__(self):
        return {'arctic': self.arctic, 'library': '.'.join([self.database_name, self.library])}

    def __setstate__(self, state):
        return ArcticLibraryBinding.__init__(self, state['arctic'], state['library'])

    @mongo_retry
    def _auth(self, database):
        # Get .mongopass details here
        if not hasattr(self.arctic, 'mongo_host'):
            return

        auth = get_auth(self.arctic.mongo_host, self.arctic._application_name, database.name)
        if auth:
            authenticate(database, auth.user, auth.password)

    def reset_auth(self):
        logger.debug("reset_auth() %s" % self)
        self._auth(self._db)

    def get_name(self):
        return self._db.name + '.' + self._library_coll.name

    def get_top_level_collection(self):
        """
        Return the top-level collection for the Library.  This collection is to be used
        for storing data.

        Note we expect (and callers require) this collection to have default read-preference: primary
        The read path may choose to reduce this if secondary reads are allowed.
        """
        return self._library_coll

    def set_quota(self, quota_bytes):
        """
        Set a quota (in bytes) on this user library.  The quota is 'best effort',
        and should be set conservatively.

        A quota of 0 is 'unlimited'
        """
        self.set_library_metadata(ArcticLibraryBinding.QUOTA, quota_bytes)
        self.quota = quota_bytes
        self.quota_countdown = 0

    def get_quota(self):
        """
        Get the current quota on this user library.
        """
        return self.get_library_metadata(ArcticLibraryBinding.QUOTA)

    def check_quota(self):
        """
        Check whether the user is within quota.  Should be called before
        every write.  Will raise() if the library has exceeded its allotted
        quota.
        """
        # Don't check on every write, that would be slow
        if self.quota_countdown > 0:
            self.quota_countdown -= 1
            return

        # Re-cache the quota after the countdown
        self.quota = self.get_library_metadata(ArcticLibraryBinding.QUOTA)
        if self.quota is None or self.quota == 0:
            self.quota = 0
            return

        # Figure out whether the user has exceeded their quota
        library = self.arctic[self.get_name()]
        stats = library.stats()

        def to_gigabytes(bytes_):
            return bytes_ / 1024. / 1024. / 1024.

        # Have we exceeded our quota?
        size = stats['totals']['size']
        count = stats['totals']['count']
        if size >= self.quota:
            raise QuotaExceededException("Mongo Quota Exceeded: %s %.3f / %.0f GB used" % (
                '.'.join([self.database_name, self.library]),
                to_gigabytes(size),
                to_gigabytes(self.quota)))

        # Quota not exceeded, print an informational message and return
        try:
            avg_size = size // count if count > 1 else 100 * 1024
            remaining = self.quota - size
            remaining_count = remaining / avg_size
            if remaining_count < 100 or float(remaining) / self.quota < 0.1:
                logger.warning("Mongo Quota: %s %.3f / %.0f GB used" % (
                    '.'.join([self.database_name, self.library]),
                    to_gigabytes(size),
                    to_gigabytes(self.quota)))
            else:
                logger.info("Mongo Quota: %s %.3f / %.0f GB used" % (
                    '.'.join([self.database_name, self.library]),
                    to_gigabytes(size),
                    to_gigabytes(self.quota)))

            # Set-up a timer to prevent us for checking for a few writes.
            # This will check every average half-life
            self.quota_countdown = int(max(remaining_count // 2, 1))
        except Exception as e:
            logger.warning("Encountered an exception while calculating quota statistics: %s" % str(e))

    def get_library_type(self):
        return self.get_library_metadata(ArcticLibraryBinding.TYPE_FIELD)

    def set_library_type(self, lib_type):
        self.set_library_metadata(ArcticLibraryBinding.TYPE_FIELD, lib_type)

    @mongo_retry
    def get_library_metadata(self, field):
        lib_metadata = self._library_coll[self.arctic.METADATA_COLL].find_one({"_id": self.arctic.METADATA_DOC_ID})
        if lib_metadata is not None:
            return lib_metadata.get(field)
        else:
            return None

    @mongo_retry
    def set_library_metadata(self, field, value):
        self._library_coll[self.arctic.METADATA_COLL].update_one({'_id': self.arctic.METADATA_DOC_ID},
                                                                 {'$set': {field: value}}, upsert=True)