python source code of __init_

from collections import defaultdict
from importlib import import_module

from django.conf import settings
from elasticsearch.client import Elasticsearch
from elasticsearch_dsl.search import Search
from six import iteritems, itervalues, string_types

from .aliases import SearchAlias
from .indices import ModelIndex
from .logger import logger


class Bungiesearch(Search):
    '''
    This object is used to read Django settings and initialize the elasticsearch connection.
    '''
    DEFAULT_TIMEOUT = 5
    BUNGIE = settings.BUNGIESEARCH

    # The following code loads each model index_name module (as defined in the settings) and stores
    # index_name name to model index_name, and index_name name to model. Settings shouldn't change between
    # subsequent calls to Search(), which is why this is static code.

    _cached_es_instances = {}
    # Let's go through the settings in order to map each defined Model/ModelIndex to the elasticsearch index_name.
    _model_to_index, _model_name_to_index, _model_name_to_model_idx = defaultdict(list), defaultdict(list), defaultdict(list)
    _index_to_model, _idx_name_to_mdl_to_mdlidx = defaultdict(list), defaultdict(dict)
    _model_name_to_default_index, _alias_hooks = {}, {}
    _managed_models = []
    __loaded_indices__ = False

    @classmethod
    def __load_settings__(cls):
        if cls.__loaded_indices__:
            return
        cls.__loaded_indices__ = True

        # Loading indices.
        for index_name, module_str in iteritems(cls.BUNGIE['INDICES']):
            index_module = import_module(module_str)
            for index_obj in itervalues(index_module.__dict__):
                try:
                    if issubclass(index_obj, ModelIndex) and index_obj != ModelIndex:
                        index_instance = index_obj()
                        assoc_model = index_instance.get_model()
                        cls._index_to_model[index_name].append(assoc_model)
                        cls._model_name_to_model_idx[assoc_model.__name__].append(index_instance)
                        cls._idx_name_to_mdl_to_mdlidx[index_name][assoc_model.__name__] = index_instance
                        if index_instance.is_default:
                            if assoc_model.__name__ in cls._model_name_to_default_index:
                                raise AttributeError('ModelIndex {} on index {} is marked as default, but {} was already set as default.'.format(index_instance, index_name, cls._model_name_to_default_index[assoc_model.__name__]))
                            cls._model_name_to_default_index[assoc_model.__name__] = index_instance
                except TypeError:
                    pass # Oops, just attempted to get subclasses of a non-class.

        # Create reverse maps in order to have O(1) access.
        for index_name, models in iteritems(cls._index_to_model):
            for model in models:
                cls._model_to_index[model].append(index_name)
                cls._model_name_to_index[model.__name__].append(index_name)

        # Loading aliases.
        for alias_prefix, module_str in iteritems(cls.BUNGIE.get('ALIASES', {})):
            if alias_prefix is None:
                alias_prefix = 'bungie'
            if alias_prefix != '':
                alias_prefix += '_'
            alias_module = import_module(module_str)
            for alias_obj in itervalues(alias_module.__dict__):
                try:
                    if issubclass(alias_obj, SearchAlias) and alias_obj != SearchAlias:
                        alias_instance = alias_obj()
                        cls._alias_hooks[alias_prefix + alias_instance.alias_name] = alias_instance
                except TypeError:
                    pass # Oops, just attempted to get subclasses of a non-class.

    @classmethod
    def _build_key(cls, urls, timeout, **settings):
        # Order the settings by key and then turn it into a string with
        # repr. There are a lot of edge cases here, but the worst that
        # happens is that the key is different and so you get a new
        # Elasticsearch. We'll probably have to tweak this.
        settings = sorted(settings.items(), key=lambda item: item[0])
        settings = repr([(k, v) for k, v in settings])
        # elasticsearch allows URLs to be a string, so we make sure to
        # account for that when converting whatever it is into a tuple.
        if isinstance(urls, string_types):
            urls = (urls,)
        else:
            urls = tuple(urls)
        # Generate a tuple of all the bits and return that as the key
        # because that's hashable.
        key = (urls, timeout, settings)
        return key

    @classmethod
    def get_index(cls, model, via_class=False):
        '''
        Returns the index name (as a string) for the given model as a class or a string.
        :param model: model name or model class if via_class set to True.
        :param via_class: set to True if parameter model is a class.
        :raise KeyError: If the provided model does not have any index associated.
        '''
        try:
            return cls._model_to_index[model] if via_class else cls._model_name_to_index[model]
        except KeyError:
            raise KeyError('Could not find any index defined for model {}. Is the model in one of the model index modules of BUNGIESEARCH["INDICES"]?'.format(model))

    @classmethod
    def get_model_index(cls, model, default=True):
        '''
        Returns the default model index for the given model, or the list of indices if default is False.
        :param model: model name as a string.
        :raise KeyError: If the provided model does not have any index associated.
        '''
        try:
            if default:
                return cls._model_name_to_default_index[model]
            return cls._model_name_to_model_idx[model]
        except KeyError:
            raise KeyError('Could not find any model index defined for model {}.'.format(model))

    @classmethod
    def get_indices(cls):
        '''
        Returns the list of indices defined in the settings.
        '''
        return cls._idx_name_to_mdl_to_mdlidx.keys()

    @classmethod
    def get_models(cls, index, as_class=False):
        '''
        Returns the list of models defined for this index.
        :param index: index name.
        :param as_class: set to True to return the model as a model object instead of as a string.
        '''
        try:
            return cls._index_to_model[index] if as_class else cls._idx_name_to_mdl_to_mdlidx[index].keys()
        except KeyError:
            raise KeyError('Could not find any index named {}. Is this index defined in BUNGIESEARCH["INDICES"]?'.format(index))

    @classmethod
    def get_model_indices(cls, index):
        '''
        Returns the list of model indices (i.e. ModelIndex objects) defined for this index.
        :param index: index name.
        '''
        try:
            return cls._idx_name_to_mdl_to_mdlidx[index].values()
        except KeyError:
            raise KeyError('Could not find any index named {}. Is this index defined in BUNGIESEARCH["INDICES"]?'.format(index))

    @classmethod
    def map_raw_results(cls, raw_results, instance=None):
        '''
        Maps raw results to database model objects.
        :param raw_results: list raw results as returned from elasticsearch-dsl-py.
        :param instance: Bungiesearch instance if you want to make use of `.only()` or `optmize_queries` as defined in the ModelIndex.
        :return: list of mapped results in the *same* order as returned by elasticsearch.
        '''
        # Let's iterate over the results and determine the appropriate mapping.
        model_results = defaultdict(list)
        # Initializing the list to the number of returned results. This allows us to restore each item in its position.
        if hasattr(raw_results, 'hits'):
            results = [None] * len(raw_results.hits)
        else:
            results = [None] * len(raw_results)
        found_results = {}
        for pos, result in enumerate(raw_results):
            model_name = result.meta.doc_type
            if model_name not in Bungiesearch._model_name_to_index or result.meta.index not in Bungiesearch._model_name_to_index[model_name]:
                logger.warning('Returned object of type {} ({}) is not defined in the settings, or is not associated to the same index as in the settings.'.format(model_name, result))
                results[pos] = result
            else:
                meta = Bungiesearch.get_model_index(model_name).Meta
                model_results['{}.{}'.format(result.meta.index, model_name)].append(result.meta.id)
                found_results['{1.meta.index}.{0}.{1.meta.id}'.format(model_name, result)] = (pos, result.meta)

        # Now that we have model ids per model name, let's fetch everything at once.
        for ref_name, ids in iteritems(model_results):
            index_name, model_name = ref_name.split('.')
            model_idx = Bungiesearch._idx_name_to_mdl_to_mdlidx[index_name][model_name]
            model_obj = model_idx.get_model()
            items = model_obj.objects.filter(pk__in=ids)
            if instance:
                if instance._only == '__model' or model_idx.optimize_queries:
                    desired_fields = model_idx.fields_to_fetch
                elif instance._only == '__fields':
                    desired_fields = instance._fields
                else:
                    desired_fields = instance._only

                if desired_fields: # Prevents setting the database fetch to __fields but not having specified any field to elasticsearch.
                    items = items.only(
                        *[field.name
                          for field in model_obj._meta.get_fields()
                          # For complete backwards compatibility, you may want to exclude
                          # GenericForeignKey from the results.
                          if field.name in desired_fields and \
                             not (field.many_to_one and field.related_model is None)
                         ]
                    )
            # Let's reposition each item in the results and set the _searchmeta meta information.
            for item in items:
                pos, meta = found_results['{}.{}.{}'.format(index_name, model_name, item.pk)]
                item._searchmeta = meta
                results[pos] = item

        return results

    def __init__(self, urls=None, timeout=None, force_new=False, raw_results=False, **kwargs):
        '''
        Creates a new ElasticSearch DSL object. Grabs the ElasticSearch connection from the pool
        if it has already been initialized. Otherwise, creates a new one.

        If no parameters are passed, everything is determined from the Django settings.

        :param urls: A list of URLs, or a single string of URL (without leading `http://`), or None to read from settings.
        :param idx: A list of indices or a single string representing an index_name name. Is optional. Will be merged with `idx_alias`.
        :param idx_alias: A list of index_name aliases or a single string representing an index_name alias, as defined in the settings. Will be merged with `index_name`.
        :param timeout: Timeout used in the connection.
        :param force_new: Set to `True` to force a new elasticsearch connection. Otherwise will aggressively use any connection with the exact same settings.
        :param **kwargs: Additional settings to pass to the low level elasticsearch client and to elasticsearch-sal-py.search.Search.
        '''

        Bungiesearch.__load_settings__()

        urls = urls or Bungiesearch.BUNGIE['URLS']
        if not timeout:
            timeout = Bungiesearch.BUNGIE.get('TIMEOUT', Bungiesearch.DEFAULT_TIMEOUT)

        search_keys = ['using', 'index', 'doc_type', 'extra']
        search_settings, es_settings = {}, {}
        for k, v in iteritems(kwargs):
            if k in search_keys:
                search_settings[k] = v
            else:
                es_settings[k] = v

        if not es_settings:
            # If there aren't any provided elasticsearch settings, let's see if it's defined in the settings.
            es_settings = Bungiesearch.BUNGIE.get('ES_SETTINGS', {})

        # Building a caching key to cache the es_instance for later use (and retrieved a previously cached es_instance).
        cache_key = Bungiesearch._build_key(urls, timeout, **es_settings)
        es_instance = None
        if not force_new:
            if cache_key in Bungiesearch._cached_es_instances:
                es_instance = Bungiesearch._cached_es_instances[cache_key]

        if not es_instance:
            es_instance = Elasticsearch(urls, timeout=timeout, **es_settings)
            Bungiesearch._cached_es_instances[cache_key] = es_instance

        if 'using' not in search_settings:
            search_settings['using'] = es_instance

        super(Bungiesearch, self).__init__(**search_settings)

        # Creating instance attributes.
        self._only = [] # Stores the exact fields to fetch from the database when mapping.
        self.results = [] # Store the mapped and unmapped results.
        self._raw_results_only = raw_results

    def _clone(self):
        '''
        Must clone additional fields to those cloned by elasticsearch-dsl-py.
        '''
        instance = super(Bungiesearch, self)._clone()
        instance._raw_results_only = self._raw_results_only
        return instance

    def get_es_instance(self):
        '''
        Returns the low level elasticsearch instance to perform low level operations.
        '''
        return self._using

    def execute_raw(self):
        self.raw_results = super(Bungiesearch, self).execute()

    def execute(self, return_results=True):
        '''
        Executes the query and attempts to create model objects from results.
        '''
        if self.results:
            return self.results if return_results else None

        self.execute_raw()

        if self._raw_results_only:
            self.results = self.raw_results
        else:
            self.map_results()

        if return_results:
            return self.results

    def map_results(self):
        '''
        Maps raw results and store them.
        '''
        self.results = Bungiesearch.map_raw_results(self.raw_results, self)

    def only(self, *fields):
        '''
        Restricts the fields to be fetched when mapping. Set to `__model` to fetch all fields define in the ModelIndex.
        '''
        s = self._clone()
        if len(fields) == 1 and fields[0] == '__model':
            s._only = '__model'
        else:
            s._only = fields
        return s

    def __iter__(self):
        '''
        Allows iterating on the response.
        '''
        self.execute()
        return iter(self.results)

    def __len__(self):
        '''
        Return elasticsearch-dsl-py count.
        '''
        return self.count()

    def __getitem__(self, key):
        '''
        Overwriting the step in slice. It is used to set the results either as elasticsearch-dsl-py response object, or
        attempt to fetch the Django model instance.
        :warning: Getting an item will execute this search. Any search operation or field setting *must* be done prior to getting an item.
        '''
        if isinstance(key, slice):
            if key.step is not None:
                self._raw_results_only = key.step
                if key.start is not None and key.stop is not None:
                    single_item = key.start - key.stop == -1
                elif key.start is None and key.stop == 1:
                    single_item = True
                else:
                    single_item = False
                key = slice(key.start, key.stop)
            else:
                single_item = False
        else:
            single_item = True
        results = super(Bungiesearch, self).__getitem__(key).execute()
        if single_item:
            try:
                return results[0]
            except IndexError:
                return []
        return results

    def hook_alias(self, alias, model_obj=None):
        '''
        Returns the alias function, if it exists and if it can be applied to this model.
        '''
        try:
            search_alias = self._alias_hooks[alias]
        except KeyError:
            raise AttributeError('Could not find search alias named {}. Is this alias defined in BUNGIESEARCH["ALIASES"]?'.format(alias))
        else:
            if search_alias._applicable_models and \
                ((model_obj and model_obj not in search_alias._applicable_models) or \
                 not any([app_model_obj.__name__ in self._doc_type for app_model_obj in search_alias._applicable_models])):
                    raise ValueError('Search alias {} is not applicable to model/doc_types {}.'.format(alias, model_obj if model_obj else self._doc_type))
            return search_alias.prepare(self, model_obj).alias_for

    def __getattr__(self, alias):
        '''
        Shortcut for search aliases. As explained in the docs (https://docs.python.org/2/reference/datamodel.html#object.__getattr__),
        this is only called as a last resort in case the attribute is not found.
        '''
        return self.hook_alias(alias)