#-*- encoding: utf-8 -*- """Basic wrapper (client) over OpenLibrary REST API""" from __future__ import absolute_import, division, print_function from collections import namedtuple import json import jsonschema import logging import os import re from six.moves.urllib.parse import urlencode import backoff import requests from . import common from .config import Config from .utils import merge_unique_lists logger = logging.getLogger('openlibrary') class OpenLibrary(object): """Open Library API Client. Usage: >>> from olclient.openlibrary import OpenLibrary >>> import olclient.common as common >>> ol = OpenLibrary(base_url="http://0.0.0.0:8080") ... # Create a new book >>> book = common.Book(title=u"Warlight: A novel", \ ... authors=[common.Author(name=u"Michael Ondaatje")], \ ... publisher=u"Deckle Edge", publish_date=u"2018") >>> book.add_id(u'isbn_10', u'0525521194') >>> book.add_id(u'isbn_13', u'9780525521198') >>> new_book = ol.create_book(book) >>> new_book.add_bookcover('https://images-na.ssl-images-amazon.com/images/I/51kmM%2BvVRJL._SX337_BO1,204,203,200_.jpg') ... # Fetch and update an existing book >>> book = ol.get_book_by_isbn(u"3570028364") >>> book.title = u"Wie die Weißen Engel die Blauen Tiger zur " \ ... "Schnecke machten" >>> book.save(comment="correcting title") """ VALID_IDS = ['isbn_10', 'isbn_13', 'lccn', 'ocaid'] BACKOFF_KWARGS = { 'wait_gen': backoff.expo, 'exception': requests.exceptions.RequestException, 'giveup': lambda e: hasattr(e.response, 'status_code') and 400 <= e.response.status_code < 500, 'max_tries': 5 } # constants to aid works.json API request's pagination WORKS_LIMIT = 50 WORKS_PAGINATION_OFFSET = 0 def __init__(self, credentials=None, base_url=u'https://openlibrary.org'): self.session = requests.Session() self.base_url = base_url credentials = credentials or \ Config().get_config().get('s3', None) if credentials: self.login(credentials) def login(self, credentials): """Login to Open Library with given credentials, ensures the requests session has valid cookies for future requests. """ if 'username' in credentials._asdict(): headers = {'Content-Type': 'application/x-www-form-urlencoded'} data = urlencode(credentials._asdict()) else: # s3 login headers = {'Content-Type': 'application/json'} data = json.dumps(credentials._asdict()) url = self.base_url + '/account/login' err = lambda e: logger.exception("Error at login: %s", e) @backoff.on_exception(on_giveup=err, **self.BACKOFF_KWARGS) def _login(url, headers, data): """Makes best effort to perform request w/ exponential backoff""" return self.session.post(url, data=data, headers=headers) response = _login(url, headers, data) if not self.session.cookies: raise ValueError("No cookie set") def validate(self, doc, schema_name): """Validates a doc's json representation against its JSON Schema using jsonschema.validate(). Returns: None Raises: jsonschema.exceptions.ValidationError if validation fails. """ path = os.path.dirname(os.path.realpath(__file__)) schemata_path = "%s/schemata/%s" % (path, schema_name) with open(schemata_path) as schema_data: schema = json.load(schema_data) resolver = jsonschema.RefResolver('file://' + schemata_path, schema) return jsonschema.Draft4Validator(schema, resolver=resolver).validate(doc.json()) def delete(self, olid, comment): """Delete a single Open Library entity by olid (str) CAUTION: This does not make any checks for backreference consistency, Editions could be orphaned, or books left without Authors. Use with care! """ data = json.dumps({ 'type': { 'key': '/type/delete' }, '_comment': comment }) url = self._generate_url_from_olid(olid) return self.session.put(url, data=data) def save_many(self, docs, comment): """ Uses the Open Library save_many API endpoint to write any number or combination of documents (Edition, Work, or Author) back to Open Library. Uses HTTP Extension Framework custom headers (RFC 2774). """ headers = { 'Opt': '"http://openlibrary.org/dev/docs/api"; ns=42', '42-comment': comment } doc_json = [doc.json() for doc in docs] return self.session.post('%s/api/save_many' % self.base_url, json.dumps(doc_json), headers=headers) err = lambda e: logger.exception("Error retrieving OpenLibrary response: %s", e) @backoff.on_exception(on_giveup=err, **BACKOFF_KWARGS) def _get_ol_response(self, path): """Makes best effort to perform request w/ exponential backoff""" response = self.session.get(self.base_url + path) response.raise_for_status() return response @property def Work(ol_self): """ >>> from olclient import OpenLibrary >>> ol = OpenLibrary() >>> ol.Work.get(olid) """ class Work(common.Entity): OL = ol_self def __init__(self, olid, **kwargs): self.olid = olid self._editions = [] self.description = OpenLibrary.get_text_value(kwargs.pop('description', None)) self.notes = OpenLibrary.get_text_value(kwargs.pop('notes', None)) for kwarg in kwargs: setattr(self, kwarg, kwargs[kwarg]) def json(self): """Returns a dict JSON representation of an OL Work suitable for saving back to Open Library via its APIs. """ exclude = ['_editions', 'olid'] data = { k: v for k,v in self.__dict__.items() if v and k not in exclude } data['key'] = u'/works/' + self.olid data['type'] = {u'key': u'/type/work'} if data.get('description'): data['description'] = {u'type': u'/type/text', u'value': data['description']} if data.get('notes'): data['notes'] = {u'type': u'/type/text', u'value': data['notes']} return data def validate(self): """Validates a Work's json representation against the canonical JSON Schema for Works using jsonschema.validate(). Returns: None Raises: jsonschema.exceptions.ValidationError if the Work is invalid. """ return self.OL.validate(self, 'work.schema.json') @property def editions(self): """Returns a list of editions of related to a particular work Args: None Returns (List) of common.Edition books Usage: >>> from olclient import OpenLibrary >>> ol = OpenLibrary() >>> ol.Work(olid).editions """ url = '%s/works/%s/editions.json' % (self.OL.base_url, self.olid) try: r = self.OL.session.get(url) editions = r.json().get('entries', []) except Exception as e: return [] self._editions = [ self.OL.Edition( **self.OL.Edition._ol_edition_json_to_book_args(ed)) for ed in editions] return self._editions @classmethod def create(cls, book, debug=False): """Creates a new work along with a new edition Args: book (common.Book object) Returns: (common.Work) Usage: >>> from olclient.openlibrary import OpenLibrary >>> import olclient.common as common >>> book = common.Book(title=u"Warlight: A novel", authors=[common.Author(name=u"Michael Ondaatje")], publisher=u"Deckle Edge", publish_date=u"2018") >>> book.add_id(u'isbn_10', u'0525521194') >>> book.add_id(u'isbn_13', u'978-0525521198')) >>> ol.Work.create(book) """ try: book.publish_date = re.findall( r'[\d]{4}', book.publish_date)[0] except: book.publish_date = u'' ed = cls.OL.create_book(book, debug=debug) ed.add_bookcover(book.cover) work = ed.work work.add_bookcover(book.cover) return ed def add_author(self, author): author_role = {u'type': {u'key': u'/type/author_role'}} author_role[u'author'] = {u'key': u'/authors/' + author.olid} self.authors.append(author_role) return author_role def add_bookcover(self, url): _url = '%s/works/%s/-/add-cover' % (self.OL.base_url, self.olid) r = self.OL.session.post(_url, files={ 'file': '', 'url': url, 'upload': 'submit' }) return r def add_subject(self, subject, comment=''): return self.add_subjects([subject], comment) def add_subjects(self, subjects, comment=''): url = self.OL.base_url + "/works/" + self.olid + ".json" data = self.OL.session.get(url).json() original_subjects = data.get('subjects', []) changed_subjects = merge_unique_lists([original_subjects, subjects]) data['_comment'] = comment or ('adding %s to subjects' % ', '.join(subjects)) data['subjects'] = changed_subjects return self.OL.session.put(url, json.dumps(data)) def rm_subjects(self, subjects, comment=''): url = self.OL.base_url + "/works/" + self.olid + ".json" r = self.OL.session.get(url) data = r.json() data['_comment'] = comment or ('rm subjects: %s' % ', '.join(subjects)) data['subjects'] = list(set(data['subjects']) - set(subjects)) return self.OL.session.put(url, json.dumps(data)) def save(self, comment): """Saves this work back to Open Library using the JSON API.""" body = self.json() body['_comment'] = comment url = self.OL.base_url + '/works/%s.json' % self.olid return self.OL.session.put(url, json.dumps(body)) @classmethod def get(cls, olid): """Fetches an OpenLibrary Work Object via the book's olid Args: OLID - Open Library ID Returns: (common.Work) Usage: >>> from olclient.openlibrary import OpenLibrary >>> ol = OpenLibrary() >>> ol.Work.get('OL26278461W') """ path = '/works/%s.json' % olid r = cls.OL._get_ol_response(path) return cls(olid, **r.json()) @classmethod def search(cls, title=None, author=None): """Get the *closest* matching result in OpenLibrary based on a title and author. FIXME: This is essentially a Work and should be moved there Args: title (unicode) author (unicode) Returns: (common.Book) Usage: >>> from olclient.openlibrary import OpenLibrary >>> ol = OpenLibrary() >>> ol.get_book_by_metadata( ... title=u'The Autobiography of Benjamin Franklin') or >>> from olclient.openlibrary import OpenLibrary >>> ol = OpenLibrary() >>> ol.get_book_by_metadata( ... author=u'Dan Brown') """ if not (title or author): raise ValueError("Author or title required for metadata search") err = lambda e: logger.exception("Error retrieving metadata " \ "for book: %s", e) url = '%s/search.json?title=%s' % (cls.OL.base_url, title) if author: url += '&author=%s' % author @backoff.on_exception(on_giveup=err, **cls.OL.BACKOFF_KWARGS) def _get_book_by_metadata(url): """Makes best effort to perform request w/ exponential backoff""" return cls.OL.session.get(url) response = _get_book_by_metadata(url) try: results = Results(**response.json()) except Exception as e: logger.exception(e) raise Exception("Work Search API failed to return json") if results.num_found: return results.first.to_book() return None return Work @property def Edition(ol_self): class Edition(common.Book): OL = ol_self def __init__(self, work_olid, edition_olid, title, subtitle=None, identifiers=None, number_of_pages=None, authors=None, publisher=None, publish_date=None, cover=None, **kwargs): """ Error: TypeError: __init__() missing 2 required positional arguments: 'edition_olid' and 'title' Usage: >>> from olclient.openlibrary import OpenLibrary >>> ol = OpenLibrary() >>> e = ol.Edition(u'OL2514725W') >>> e.book """ self._work = None self.work_olid = work_olid self.olid = edition_olid self.description = OpenLibrary.get_text_value(kwargs.pop('description', None)) self.notes = OpenLibrary.get_text_value(kwargs.pop('notes', None)) super(Edition, self).__init__( title, subtitle=subtitle, identifiers=identifiers, number_of_pages=number_of_pages, authors=authors, publisher=publisher, publish_date=publish_date, cover=cover, **kwargs) @staticmethod def _validate_identifiers(identifiers): """Don't reject existing identifiers from Open Library.""" return @property def work(self): self._work = self.OL.Work.get(self.work_olid) return self._work def json(self): """Returns a dict JSON representation of an OL Edition suitable for saving back to Open Library via its APIs. """ exclude = ['_work', 'olid', 'work_olid', 'pages'] data = { k: v for k,v in self.__dict__.items() if v and k not in exclude } data['key'] = '/books/' + self.olid data['type'] = {u'key': u'/type/edition'} if self.pages: data['number_of_pages'] = self.pages if self.work_olid: data['works'] = [ { 'key': '/works/' + self.work_olid} ] if self.authors: data['authors'] = [ {'key': '/authors/' + a.olid} for a in self.authors ] if data.get('description'): data['description'] = {u'type': u'/type/text', u'value': data['description']} if data.get('notes'): data['notes'] = {u'type': u'/type/text', u'value': data['notes']} return data def validate(self): """Validates an Edition's json representation against the canonical JSON Schema for Editions using jsonschema.validate(). Returns: None Raises: jsonschema.exceptions.ValidationError if the Edition is invalid. """ return self.OL.validate(self, 'edition.schema.json') def add_bookcover(self, cover_url): """Adds a cover image to this edition""" url = '%s/books/%s/-/add-cover' % (self.OL.base_url, self.olid) r = self.OL.session.post(url, files={ 'file': '', 'url': cover_url, 'upload': 'submit' }) return r def save(self, comment): """Saves this edition back to Open Library using the JSON API.""" body = self.json() body['_comment'] = comment url = self.OL.base_url + '/books/%s.json' % self.olid return self.OL.session.put(url, json.dumps(body)) @classmethod def create(cls, book, work_olid, debug=False): """Creates this book as an Edition associated with the work having olid work_olid Args: book (common.Book) work_olid (unicode) - The olid of the work to add this book to Returns: Edition Object Usage: >>> from olclient import OpenLibrary >>> ol = OpenLibrary() >>> = ol.Edition.create(Book(...), u'OL2514725W') """ return cls.OL.create_book(book, work_olid=work_olid, debug=debug) @classmethod def _ol_edition_json_to_book_args(cls, data): """Creates Book Arguments from OL Edition JSON Args: json - {"edition_olid":"XXX", "authors":["XXX","XXX"], "work_olid":"XXX"} Returns: book arguments Dictionary Usage: >>> from olclient import OpenLibrary >>> ol = OpenLibrary() >>> = ol.Edition._ol_edition_json_to_book_args(data) """ book_args = { 'edition_olid': data.pop('key', u'').split('/')[-1], 'work_olid': data.pop('works')[0]['key'].split('/')[-1] if 'works' in data else None, 'authors': [cls.OL.Author.get(author['key'].split('/')[-1]) for author in data.pop('authors', [])] } book_args.update(data) return book_args @classmethod def get(cls, olid=None, isbn=None, oclc=None, lccn=None, ocaid=None): """Retrieves a single book from OpenLibrary as json by isbn or olid or ocaid or lccn or oclc or olid. Args: identifier (unicode) - identifier value, e.g. u'OL20933604M' Warnings: Currently, the marshaling is not complete. While it generates/returns a valid book, ideally we want the OpenLibrary fields to be converted into a format which is consistent with how we are using olclient Book to create OpenLibrary books -- i.e. authors = Author objects, publishers list instead of publisher, identifiers (instead of key and isbn). The goal is to enable service to interoperate with the Book object and for OpenLibrary to be able to marshal the book object into a form it can use (or marshal its internal book json into a form others can use). Usage: >>> from olclient import OpenLibrary >>> ol = OpenLibrary() >>> ol.Edition.get(olid=u'OL25944230M') or >>> ol.Edition.get(isbn=u'9706664998') or >>> ol.Edition.get(oclc=u'893562252') or >>> ol.Edition.get(lccn=u'XXX') or >>> ol.Edition.get(ocaid=u'XXX') """ if not any([olid, isbn, oclc, lccn, ocaid]): raise ValueError("Must supply valid olid, isbn, oclc, ocaid, or lccn") elif not olid: bibkeys = {'ISBN': isbn, 'OCLC': oclc, 'OCAID': ocaid, 'LCCN': lccn} bibkey, value = [(k, v) for k,v in bibkeys.items() if v][0] olid = cls.get_olid(bibkey, value) if not olid: # No edition found by bibkey return path = '/books/%s.json' % olid response = cls.OL._get_ol_response(path) try: data = response.json() data['title'] = data.get('title', None) edition = cls(**cls._ol_edition_json_to_book_args(data)) return edition except Exception as e: raise Exception("Unable to get Edition with olid: %s\nDetails: %s" % (olid, e)) @classmethod def get_olid_by_ocaid(cls, ocaid): return cls.get_olid('OCAID', ocaid) @classmethod def get_olid_by_isbn(cls, isbn): return cls.get_olid('ISBN', isbn) @classmethod def get_olid_by_lccn(cls, lccn): return cls.get_olid('LCCN', lccn) @classmethod def get_olid_by_oclc(cls, oclc): return cls.get_olid('OCLC', oclc) @classmethod def get_olid(cls, key, value): """Looks up a key (LCCN, OCLC, ISBN10/13, OCAID) in OpenLibrary and returns a matching olid if a match exists. Args: key (unicode) - u'OCLC', u'ISBN', u'LCCN', u'OCAID' value (unicode) - identifier value Returns: olid (unicode) or None """ metadata = cls.get_metadata(key, value) if metadata: book_url = metadata.get('info_url', '') return cls.OL._extract_olid_from_url(book_url, url_type="books") @classmethod def get_metadata(cls, key, value): """Looks up a key (LCCN, OCLC, ISBN10/13, OCAID) using the Open Library Books API https://openlibrary.org/dev/docs/api/books Returns first matched JSON object for the bibliographic key, or None if there is no match. Response keys: 'bib_key': Identifier used to query this book. 'info_url': A URL to the book page. 'preview': Preview state, 'noview' or 'full'. 'preview_url': A URL to the preview of the book. 'thumbnail_url': A URL to a bookcover thumbnail. Args: key (unicode) - u'OCLC', u'ISBN', u'LCCN', u'OCAID' value (unicode) - identifier value Returns: Dict or None Usage: >>> from olclient import OpenLibrary >>> ol = OpenLibrary() >>> ol.Edition.get_metadata(u'ISBN', u'9780747550303') or >>> ol.Edition.get_metadata(u'OCLC', u'XXX') or >>> ol.Edition.get_metadata(u'LCCN', u'XXX') or >>> ol.Edition.get_metadata(u'OCAID', u'XXX') """ if key not in ['OCLC', 'ISBN', 'LCCN', 'OLID', 'OCAID']: raise ValueError("key must be one of OCLC, OLID, ISBN, OCAID, or LCCN") path = '/api/books.json?bibkeys=%s:%s' % (key, value) response = cls.OL._get_ol_response(path) try: results = response.json() except ValueError as e: logger.exception(e) return None _key = u'%s:%s' % (key, value) if _key in results: return results[_key] return None return Edition @property def Author(ol_self): class Author(common.Author): OL = ol_self def __init__(self, olid, name, **author_kwargs): self.olid = olid super(Author, self).__init__(name, **author_kwargs) @staticmethod def _validate_name(name): """Don't reject existing author names from Open Library.""" return def json(self): """Returns a dict JSON representation of an OL Author suitable for saving back to Open Library via its APIs. """ exclude = ['olid', 'identifiers'] data = { k: v for k,v in self.__dict__.items() if v and k not in exclude } data['key'] = u'/authors/' + self.olid data['type'] = {u'key': u'/type/author'} if 'bio' in data: data['bio'] = {u'type': u'/type/text', u'value': data['bio']} return data def validate(self): """Validates an Author's json representation against the canonical JSON Schema for Authors using jsonschema.validate(). Returns: None Raises: jsonschema.exceptions.ValidationError if the Author is invalid. """ return self.OL.validate(self, 'author.schema.json') def save(self, comment): """Saves this author back to Open Library using the JSON API.""" body = self.json() body['_comment'] = comment url = self.OL.base_url + '/authors/%s.json' % self.olid return self.OL.session.put(url, json.dumps(body)) def works(self, limit=OL.WORKS_LIMIT, offset=OL.WORKS_PAGINATION_OFFSET): """Returns a list of OpenLibrary Works associated with an OpenLibrary Author. Args: olid (unicode) - OpenLibrary ID for author to search within Open Library's database of authors to retrieve his Works. name (unicode) - name of an Author to search for within OpenLibrary. limit (integer) - number of Author's Works to return. offset (integer) - offset number to aid pagination. Returns: A (list) of Works from the OpenLibrary associated with the Author. Usage: >>> from olclient.openlibrary import OpenLibrary >>> ol = OpenLibrary() >>> ol.Author.get('OL39307A').works() or >>> ol.Author.get('OL39307A').works(limit=20)# to obtain the first 20 works of the author >>> ol.Author.get('OL39307A').works(limit=20, offset=20)# to obtain the next 20 works of the author or >>> author_obj = ol.Author.get(ol.Author.get_olid_by_name('Dan Brown')) >>> author_obj.works() or >>> ol.Author.get(ol.Author.get_olid_by_name('Dan Brown')).works() """ path = '/authors/%s/works.json' % self.olid # check to prevent 'None' value limit = limit or self.OL.WORKS_LIMIT offset = offset or self.OL.WORKS_PAGINATION_OFFSET # including limit and offset querystrings to the url path += '/?limit=%s&offset=%s' % (limit, offset) try: response = self.OL._get_ol_response(path) return response.json() except Exception as e: logger.exception(e) raise Exception("Author API failed to return json") @classmethod def get(cls, olid): """Retrieves an OpenLibrary Author by author_olid Args: olid (unicode) - OpenLibrary ID for author to search within Open Library's database of authors Returns: A (list) of author object from the OpenLibrary authors autocomplete API Usage: >>> from olclient.openlibrary import OpenLibrary >>> ol = OpenLibrary() >>> ol.Author.get('OL39307A') """ path = '/authors/%s.json' % olid r = cls.OL._get_ol_response(path) try: data = r.json() olid = cls.OL._extract_olid_from_url(data.pop('key', u''), url_type='authors') except: raise Exception("Unable to get Author with olid: %s" % olid) return cls( olid, name=data.pop('name', u''), bio=OpenLibrary.get_text_value(data.pop('bio', None)), **data) @classmethod def search(cls, name, limit=1): """Finds a list of OpenLibrary authors with similar names to the search query using the Author auto-complete API. Args: name (unicode) - name of author to search for within OpenLibrary's database of authors limit (integer) - number of objects with similar names Returns: A (list) of matching authors from the OpenLibrary authors autocomplete API Usage: >>> from olclient.openlibrary import OpenLibrary >>> ol = OpenLibrary() >>> ol.Author.search('Dan Brown') or >>> ol.Author.search('Dan Brown', 5) """ if name: err = lambda e: logger.exception( "Error fetching author matches: %s", e) url = cls.OL.base_url + '/authors/_autocomplete?q=%s&limit=%s' \ % (name, limit) @backoff.on_exception(on_giveup=err, **cls.OL.BACKOFF_KWARGS) def _get_matching_authors_by_name(url): """Makes best effort to perform request w/ exponential backoff""" return cls.OL.session.get(url) response = _get_matching_authors_by_name(url) author_matches = response.json() return author_matches return [] @classmethod def get_olid_by_name(cls, name): """Uses the Authors auto-complete API to find OpenLibrary Authors with similar names. If any name is an exact match then return the matching author's 'key' (i.e. olid). Otherwise, return None. FIXME Warning: if there are multiple exact matches, (e.g. a common name like "Mike Smith" which may have multiple valid results), this presents a problem. Args: name (unicode) - name of an Author to search for within OpenLibrary Returns: olid (unicode) Usage: >>> from olclient.openlibrary import OpenLibrary >>> ol = OpenLibrary() >>> ol.Author.get_olid_by_name('Dan Brown') """ authors = cls.search(name) _name = name.lower().strip() for author in authors: if _name == author['name'].lower().strip(): return author['key'].split('/')[-1] return None # This returns the Author class from the ol.Author factory method return Author @property def Delete(ol_self): class Delete(common.Entity): OL = ol_self def __init__(self, doc): """Creates a delete object from the either the <Author | Edition | Work> OR an olid. """ try: self.olid = doc.olid except AttributeError: self.olid = doc def json(self): data = { u'key': OpenLibrary.full_key(self.olid), u'type': { u'key': u'/type/delete' } } return data def save(self, comment='delete'): """Saves the Delete back to Open Library using the JSON API.""" body = self.json() body['_comment'] = comment url = self.OL._generate_url_from_olid(self.olid) return self.OL.session.put(url, json.dumps(body)) return Delete @property def Redirect(ol_self): class Redirect(common.Entity): OL = ol_self def __init__(self, **kwargs): """ Usage: >>> r = ol.Redirect(f=u'OL2514725W', t=u'OL1234W') OR >>> r = ol.Redirect(f=<ol.Edition>, t=<ol.Edition>) """ try: self.olid = kwargs['f'].olid except AttributeError: self.olid = kwargs['f'] try: self.location = kwargs['t'].olid except AttributeError: self.location = kwargs['t'] self.olid = self.olid.upper() self.location = self.location.upper() if OpenLibrary.get_type(self.olid) != OpenLibrary.get_type(self.location): raise Exception("Types don't match!") def json(self): data = { u'key': OpenLibrary.full_key(self.olid), u'location': OpenLibrary.full_key(self.location), u'type': { u'key': u'/type/redirect' } } return data def save(self, comment='redirect'): """Saves the Redirect back to Open Library using the JSON API.""" body = self.json() body['_comment'] = comment url = self.OL._generate_url_from_olid(self.olid) return self.OL.session.put(url, json.dumps(body)) return Redirect def get(self, olid): _olid = olid.lower() if _olid.endswith('m'): return self.Edition.get(olid) elif _olid.endswith('w'): return self.Work.get(olid) elif _olid.endswith('a'): return self.Author.get(olid) @classmethod def get_primary_identifier(cls, book): """XXX needs docs """ id_name, id_value = None, None for valid_key in cls.VALID_IDS: if valid_key in book.identifiers: id_name = valid_key id_value = book.identifiers[valid_key][0] break if not (id_name and id_value): raise ValueError("ISBN10/13 or LCCN required") return id_name, id_value def create_book(self, book, work_olid=None, debug=False): """Create a new OpenLibrary Book using the /books/add endpoint Args: book (Book) work_olid (unicode) - if present, associates this edition with an existing work. debug (bool) - whether to create the book or return it as data Usage: >>> ol = OpenLibrary() ... book = ol.create_book(Book( ... title=u"Wie die Weißen Engel die Blauen Tiger zur " \ ... "Schnecke machten", ... author=Author(name=u"Walter Kort"), ... publisher=u"Bertelsmann", ... isbn=u"3570028364", publish_date=u"1982")) """ id_name, id_value = self.get_primary_identifier(book) author_name = None for _author in book.authors: if len(_author.name.split(" ")) > 1: author_name = _author.name continue if not author_name: raise ValueError("Unable to create_book without valid Author name") author_olid = self.Author.get_olid_by_name(author_name) author_key = ('/authors/' + author_olid) if author_olid else u'__new__' return self._create_book( title=book.title, author_name=author_name, author_key=author_key, publish_date=book.publish_date, publisher=book.publisher, id_name=id_name, id_value=id_value, work_olid=work_olid, debug=debug) def _create_book(self, title, author_name, author_key, publish_date, publisher, id_name, id_value, work_olid=None, debug=False): """ Returns: An (OpenLibrary.Edition) """ if id_name not in self.VALID_IDS: raise ValueError("Invalid `id_name`. Must be one of %s, got %s" \ % (self.VALID_IDS, id_name)) err = lambda e: logger.exception("Error creating OpenLibrary " \ "book: %s", e) url = self.base_url + '/books/add' if work_olid: url += '?work=/works/%s' % work_olid data = { "title": title, "author_name": author_name, "author_key": author_key, "publish_date": publish_date, "publisher": publisher, "id_name": id_name, "id_value": id_value, "_save": "" } if debug: return data @backoff.on_exception(on_giveup=err, **self.BACKOFF_KWARGS) def _create_book_post(url, data=data): """Makes best effort to perform request w/ exponential backoff""" return self.session.post(url, data=data) response = _create_book_post(url, data=data) _olid = self._extract_olid_from_url(response.url, url_type="books") if _olid == u'add': raise ValueError('Creation failed, book may already exist!') return self.Edition.get(_olid) def _generate_url_from_olid(self, olid): """Returns the .json url for an olid (str)""" ol_paths = {'OL..A': 'authors', 'OL..M': 'books', 'OL..W': 'works'} kind = re.sub(r'\d+', '..', olid) return "%s/%s/%s.json" % (self.base_url, ol_paths[kind], olid) @staticmethod def get_text_value(text): """Returns the text value from a property that can either be a properly formed /type/text object, or a (incorrect) string. Used for Work/Edition 'notes' and 'description' and Author 'bio'. """ try: return text.get('value') except: return text @staticmethod def get_type(olid): ol_types = {'OL..A': 'author', 'OL..M': 'book', 'OL..W': 'work'} kind = re.sub(r'\d+', '..', olid) try: return ol_types[kind] except KeyError: raise ValueError("Unknown type for olid: %s" % olid) @staticmethod def full_key(olid): """Returns the Open Library JSON key of format /<type(plural)>/<olid> as used by the Open Library API.""" return "/%ss/%s" % (OpenLibrary.get_type(olid), olid) @staticmethod def _extract_olid_from_url(url, url_type): """No single field has the match's OpenLibrary ID in isolation so we extract it from the info_url field. Args: url_type (unicode) - "books", "authors", "works", etc which are found in the ol url, e.g.: openlibrary.org/books/... Returns: olid (unicode) Usage: >>> url = u'https://openlibrary.org/books/OL25943366M' >>> _extract_olid_from_url(url, u"books") u"OL25943366M" """ ol_url_pattern = r'[/]%s[/]([0-9a-zA-Z]+)' % url_type try: return re.search(ol_url_pattern, url).group(1) except AttributeError: return None # No match class Results(object): """Container for the results of the Search API""" def __init__(self, start=0, num_found=0, docs=None, **kwargs): self.start = start self.num_found = num_found self.docs = [self.Document(**doc) for doc in docs] or [] @property def first(self): if self.docs: return self.docs[0] class Document(object): """An aggregate OpenLibrary Work summarizing all Editions of a Book""" def __init__(self, key, title=u"", subtitle=None, subject=None, author_name=u"", author_key=None, edition_key=None, language="", publisher=None, publish_date=None, publish_place=None, first_publish_year=None, isbns=None, lccn=None, oclc=None, id_goodreads=None, id_librarything=None, **kwargs): """ Args: key (unicode) - a '/<type>/<OLID>' uri, e.g. '/works/OLXXXXXX' title (unicode) subtitle (unicode) [optional] subject (list of unicode) [optional] author_name (list of unicode) author_key (list of unicode) - list of author OLIDs edition_key (list of unicode) - list of edition OLIDs language (unicode) publisher (list of unicode) publish_date (list unicode) publish_place (list unicode) first_publish_year (int) isbns (list unicode) lccn (list unicode) oclc (list unicode) id_goodreads (list unicode) id_librarything (list unicode) """ work_olid = OpenLibrary._extract_olid_from_url(key, "works") edition_olids = edition_key self.title = title self.subtitle = subtitle self.subjects = subject # XXX test that during the zip, author_name and author_key # correspond to each other one-to-one, in order self.authors = [ {'name': name, 'olid': author_olid} for (name, author_olid) in zip(author_name or [], author_key or [])] self.publishers = publisher self.publish_dates = publish_date self.publish_places = publish_place self.first_publish_year = first_publish_year self.edition_olids = edition_olids self.language = language # These keys all map to [lists] of (usually one) unicode ids self.identifiers = { 'olid': [work_olid], 'isbns': isbns or [], 'oclc': oclc or [], 'lccn': lccn or [], 'goodreads': id_goodreads or [], 'librarything': id_librarything or [] } def to_book(self): """Converts an OpenLibrary Search API Results Document to a standardized Book """ publisher = self.publishers[0] if self.publishers else "" return common.Book( title=self.title, subtitle=self.subtitle, identifiers=self.identifiers, authors=self.authors, publisher=publisher, publish_date=self.first_publish_year)