# -*- coding: utf-8 -*- from copy import deepcopy from urllib import urlencode from django.conf import settings from django.db.models import ForeignKey from django.template import Template, Context from django.template.loader import render_to_string import requests import time from datetime import datetime from django.utils import timezone from django.core.exceptions import ObjectDoesNotExist from django.db import models import string from wham.apis.twitter.twitter_bearer_auth import BearerAuth as TwitterBearerAuth from wham.fields import WhamDateTimeField, WhamForeignKey FROM_LAST_ID = 'FROM_LAST_ID' def dpath(d, path): node = d for property_name in path: node = node[property_name] return node class WhamImproperlyConfigured(Exception): pass class AlreadyCachedException(Exception): pass wham_meta_required = [ 'endpoint' ] wham_meta_attributes = { 'required': ('endpoint',), 'defaults': { 'url_postfix': '', 'auth_for_public_get': None, 'requires_oauth_token': False, 'url_pk_type': 'path', #the primary key is assumed to be at the end of the path (not in the querysting) by default 'url_pk_param': None, 'api_key_param': 'api_key', 'api_params': {}, 'params': {}, 'detail_base_result_path': (), 'pager_type': None, 'pager_param': None, } } wham_meta_search_attributes = { 'required': (), 'defaults': { 'endpoint': '', 'search_field': 'name', 'results_path': (), 'params': {}, 'search_param': 'q', 'fields': None } } def check_n_set_class_attributes(klass, required, defaults, exclude=()): #check required attributes are set for key in required: if key not in vars(klass).keys(): raise WhamImproperlyConfigured('WhamMeta must include the %s attribute' % key) #check for mistyped WhamMeta attributes for key in vars(klass).keys(): if not key.startswith('__'): if (key not in defaults.keys() and key not in exclude and key not in required): raise WhamImproperlyConfigured('%s is not a valid WhamMeta attribute' % key) for key, value in deepcopy(defaults).iteritems(): if not hasattr(klass, key): setattr(klass, key, value) class WhamManager(models.Manager): use_for_related_fields = True http_cache = None # if we are a ManyRelatedManaer # if self.is_many_related_manager: # self.init_wham_meta(model) # grrr this would have work, but there is this magic where # ManyRelatedManager becomes a subclass of *this* class rather than # the other way around and it does a super, but doesn't pass any friggen # arguments! see line 487 of django/db/models/fields/related.py # and this is the only place that model is passed through, and we # need model to get access to its Meta class. def init_wham_meta(self): model = self.model if not getattr(self, '_wham_meta', None): http_cache = getattr(settings, '_wham_http_cache', None) if not http_cache: settings._wham_http_cache = {} if hasattr(model, 'WhamMeta'): #check for required WhamMeta attributes self._wham_meta = getattr(model, 'WhamMeta', None) if not self._wham_meta: raise WhamImproperlyConfigured("class inheriting from WhamModel requires a WhamMeta inner class") check_n_set_class_attributes(self._wham_meta, wham_meta_attributes['required'], wham_meta_attributes['defaults'], exclude=['Search']) self._wham_search_meta = getattr(self._wham_meta, 'Search', None) if self._wham_search_meta: check_n_set_class_attributes(self._wham_search_meta, wham_meta_search_attributes['required'], wham_meta_search_attributes['defaults']) @property def is_many_related_manager(self): ducktype_attributes = ('_add_items', '_clear_items', '_remove_items') for attr in ducktype_attributes: if not hasattr(self, attr): return False return True def get_api_key(self): return getattr(settings, self.wham_meta.api_key_settings_name) def add_auth_params(self, params): if self._wham_meta.auth_for_public_get == 'API_KEY': params[self.api_key_param] = self.get_api_key() if self._wham_meta.requires_oauth_token: params['token'] = self.get_oauth_token() def make_get_request(self, url_tail, params=None, fetch_live=False, depth=1): session = requests.Session() if self._wham_meta.auth_for_public_get == 'TWITTER': twitter_auth = getattr(settings, 'twitter_auth', None) if not twitter_auth: settings.twitter_auth = TwitterBearerAuth(settings.TWITTER_API_KEY, settings.TWITTER_API_SECRET) twitter_auth = settings.twitter_auth session.auth = twitter_auth url = self._wham_meta.base_url + url_tail + self._wham_meta.url_postfix final_params = self._wham_meta.params final_params.update(params if params else {}) self.add_auth_params(final_params) if final_params: full_url = "%s?%s" % (url, urlencode(final_params)) else: full_url = url print full_url if not fetch_live: if (full_url, depth) in settings._wham_http_cache: return settings._wham_http_cache[(full_url, depth)], True, full_url, depth else: pass response = session.get(url, params=final_params) response.raise_for_status() response_data = response.json() #this should have the request time or use the standard Django cache # this is a very basic cache that only caches results as long as the process is running, just # for demo purposes. return response_data, False, full_url, depth def get_fields(self): return [field for (field, _) in self.model._meta.get_fields_with_model()] def get_field_names(self): return [field.name for field in self.get_fields()] def get_field(self, field_name): return self.model._meta.get_field_by_name(field_name)[0] def dict_to_model_instance(self, dict): pass def get_from_dict(self, data, pk_dict_key='id'): field_names = self.get_field_names() kwargs = {} for field_name in field_names: field = self.get_field(field_name) result_path = field.get_result_path() try: value = dpath(data, result_path) except KeyError: pass else: if isinstance(field, WhamForeignKey): fk_model_class = field.rel.to if value is not None: fk_instance = fk_model_class.objects.get_from_dict(value) else: fk_instance = None kwargs[field_name] = fk_instance else: if isinstance(field, WhamDateTimeField): value = datetime.fromtimestamp( time.mktime(time.strptime(value, '%a %b %d %H:%M:%S +0000 %Y'))) value = timezone.make_aware(value, timezone.get_default_timezone()) kwargs[field_name] = value try: instance = self.model.objects.get(pk=kwargs[pk_dict_key], wham_use_cache=True) for attr, value in kwargs.iteritems(): setattr(instance, attr, value) instance.save() except ObjectDoesNotExist: instance = self.model.objects.create(**kwargs) #now we do m2m fields for field, _ in self.model._meta.get_m2m_with_model(): related_class = field.rel.to wham_result_path = field.wham_result_path if not field.wham_result_path: wham_result_path = (field.name,) try: related_items = dpath(data, wham_result_path) for item in related_items: related_class.objects.get_from_dict(item) except KeyError: pass return instance def get_wham_lookup_fields(self): fields = [] for field in self.get_fields(): if getattr(field, 'wham_can_lookup', False): fields.append(field) return fields def get_from_web(self, *args, **kwargs): fetch_live = kwargs.pop('fetch_live', False) pk_field_name = self.model._meta.pk.name if pk_field_name in kwargs or 'pk' in kwargs or 'id' in kwargs: if 'pk' in kwargs: pk = kwargs['pk'] if 'id' in kwargs: pk = kwargs['id'] elif pk_field_name in kwargs: pk = kwargs[pk_field_name] params = {} if self._wham_meta.url_pk_type == 'path': url_tail = self._wham_meta.endpoint + '/' + str(pk) elif self._wham_meta.url_pk_type == 'querystring': params[self._wham_meta.url_pk_param] = str(pk) url_tail = self._wham_meta.endpoint response_data, cached, full_url, depth = self.make_get_request(url_tail, params=params, fetch_live=fetch_live) if cached: raise AlreadyCachedException() else: settings._wham_http_cache[(full_url, depth)] = None item_data = dpath(response_data, self._wham_meta.detail_base_result_path) return self.get_from_dict(item_data, pk_dict_key=pk_field_name) # else: # raise e #TODO: make it obvious in the error message that the API returned a 404 else: lookupable_fields = self.get_wham_lookup_fields() lookupable_fields_dict = {} for field in lookupable_fields: lookupable_fields_dict[field.name]= field.get_url_param() kwarg_field_names = {} for key in kwargs: query = key field_name = key.replace('__iexact', '') kwarg_field_names[field_name] = query field_names_to_lookup = set(lookupable_fields_dict.keys()) & set(kwarg_field_names) if len(field_names_to_lookup) == 0: raise Exception('query not supported by Rest API. TODO:fallback to regular django query') elif len(field_names_to_lookup) == 1: field_name = list(field_names_to_lookup)[0] url_param = lookupable_fields_dict[field_name] params = {url_param: kwargs[kwarg_field_names[field_name]]} response_data, cached, full_url, depth = \ self.make_get_request(self._wham_meta.endpoint, params=params, fetch_live=fetch_live) if cached: raise AlreadyCachedException() else: settings._wham_http_cache[(full_url, depth)] = None return self.get_from_dict(response_data) else: raise Exception('can only lookup one field at a time at this point') def get(self, *args, **kwargs): self.init_wham_meta() # this is a really dodgy hack # it's here because getting a twitter user by screen_name is case insensitive in twitter, # but a regular django get() is case sensitive. The hack is converting all # kwarg=value to kwarg__iexact=value for key, value in kwargs.iteritems(): if key not in ['pk', 'id', 'wham_fetch_live', 'wham_use_cache', 'wham_depth']: kwargs[key + '__iexact'] = kwargs.pop(key) fetch_live = kwargs.pop('wham_fetch_live', False) use_cache = kwargs.pop('wham_use_cache', False) if use_cache: return super(WhamManager, self).get(*args, **kwargs) else: # TODO: we need to check that we can actually lookup the field in the api, if not do a regular django get() try: kwargs['fetch_live'] = fetch_live return self.get_from_web(*args, **kwargs) #always get from web for now except AlreadyCachedException: # then get cached result kwargs.pop('fetch_live') return super(WhamManager, self).get(*args, **kwargs) def filter(self, *args, **kwargs): self.init_wham_meta() search_meta = self._wham_search_meta if search_meta: search_field = search_meta.search_field #what?? search_query = '%s__icontains' % search_field if search_query in kwargs: value = kwargs[search_query] url_tail = search_meta.endpoint params = search_meta.params search_param = search_meta.search_param params[search_param] = value response_data, cached, full_url, depth = \ self.make_get_request(url_tail, params=params) if cached: return super(WhamManager, self).filter(*args, **kwargs) else: settings._wham_http_cache[(full_url, depth)] = None items = dpath(response_data, search_meta.results_path) for item in items: self.get_from_dict(item) return super(WhamManager, self).filter(*args, **kwargs) return super(WhamManager, self).filter(*args, **kwargs) def all(self, *args, **kwargs): self.init_wham_meta() fetch_live = kwargs.pop('wham_fetch_live', False) use_cache = kwargs.pop('wham_use_cache', False) depth = kwargs.pop('wham_depth', 1) pages_to_get = kwargs.pop('wham_pages', 1) if pages_to_get == 'all': pages_to_get = 10000000 pages_left = pages_to_get curr_page = 1 last_id = None second_last_id = None def process_page(last_id, curr_page, pages_left): if pages_to_get > 1: if self._wham_meta.pager_type is not None: if self._wham_meta.pager_type == FROM_LAST_ID: if last_id is not None: params[self._wham_meta.pager_param] = last_id else: raise Exception('paging is not implemented yet') else: raise Exception('paging is not supported by this endpoint') response_data, cached, full_url, _ = self.make_get_request( endpoint, params, fetch_live=fetch_live, depth=depth) if cached: return response_data['last_id'] #in this case all we need to know from the cached data is the last_id... yeah I know...it's really confusing! pk_field_name = self.model._meta.pk.name items = dpath(response_data, m2m_field.wham_results_path) for item in items: item_id = item['id'] #we can't just assume the key is 'id'! but we will anyway #FIXME last_id = item_id if depth == 1: item_instance = self.get_from_dict(item, pk_dict_key=pk_field_name) elif depth == 2: item_instance = self.get(pk=item_id) #get the full object detail (requires a web request) if not self.filter(pk=item_instance.pk).exists(): if hasattr(self, 'add'): self.add(item_instance) else: # a custom "through" model must have been specified through_instance_create_kwargs = {} for field in self.through._meta.fields: if field.primary_key is True: continue #no need to include the fk as we *assume* it's an autoincrementing id (naughty naughty) #FIXME if isinstance(field, ForeignKey): if issubclass(field.rel.to, m2m_field.related.model): through_instance_create_kwargs[field.name] = self.instance continue if issubclass(field.rel.to, m2m_field.related.parent_model): through_instance_create_kwargs[field.name] = item_instance continue # if it's not the primary key field, parent field or child field through_instance_create_kwargs[field.name] = item[field.name] self.through.objects.create(**through_instance_create_kwargs) else: pass #TODO we should really update the 'through' table fields if it exists #now that we know the last_id, we can finally store the cache data settings._wham_http_cache[(full_url, depth)] = {'last_id': last_id} return last_id if not use_cache: if self.is_many_related_manager: #get the source field source_class = self.source_field.rel.to m2m_field_name = self.prefetch_cache_name #this is a total hack. it *happens* to be the same as the m2m fieldname. m2m_field = getattr(source_class, m2m_field_name).field endpoint = Template(m2m_field.wham_endpoint).render(Context({'id': self.instance.pk})) params = m2m_field.wham_params if m2m_field.wham_pk_param: params[m2m_field.wham_pk_param] = self.instance.pk while (pages_left >= 1): second_last_id = last_id last_id = process_page(last_id, curr_page, pages_left) if second_last_id == last_id: break curr_page += 1 pages_left -= 1 return super(WhamManager, self).all(*args, **kwargs) @property def docs(self): s = '' s += '\n ' + string.ljust('Field', 30) + string.ljust('Type', 10) s += '\n----------------------------------------------------------------' for field in self.get_fields(): prefix = '⚷ ' if field.primary_key else ' ' s += '\n' + prefix + string.ljust(field.name, 30) + string.ljust(field.type_repr, 10) return s def _repr_html_(self): return render_to_string('wham/docs/endpoint.html', {'endpoint': self}) def set_oauth_token(self, token): self.model.wham_oauth_token = token #store the token in the model class. why not. def get_oauth_token(self): return self.model.wham_oauth_token class WhamModel(models.Model): objects = WhamManager() class Meta(): abstract = True