python source code of document

# -*- coding: utf-8 -*-
#
#       document.py
#
#       Copyright 2009, 2010 Thomas Jost <thomas.jost@gmail.com>
#       Copyright 2015 Cimbali <me@cimba.li>
#
#       This program is free software; you can redistribute it and/or modify
#       it under the terms of the GNU General Public License as published by
#       the Free Software Foundation; either version 2 of the License, or
#       (at your option) any later version.
#
#       This program is distributed in the hope that it will be useful,
#       but WITHOUT ANY WARRANTY; without even the implied warranty of
#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#       GNU General Public License for more details.
#
#       You should have received a copy of the GNU General Public License
#       along with this program; if not, write to the Free Software
#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#       MA 02110-1301, USA.
"""
:mod:`pympress.document` -- document handling
---------------------------------------------

This module contains several classes that are used for managing documents (only
PDF documents are supported at the moment, but other formats may be added in the
future).

An important point is that this module is *completely* independent from the GUI:
there should not be any GUI-related code here, except for page rendering (and
only rendering itself: the preparation of the target surface must be done
elsewhere).
"""

from __future__ import print_function, unicode_literals

import logging
logger = logging.getLogger(__name__)

import os
import math
import enum
import tempfile
import mimetypes
import webbrowser

import gi
gi.require_version('Poppler', '0.18')
from gi.repository import Poppler

try:
    from urllib.parse import urljoin, scheme_chars
    from urllib.request import pathname2url
except ImportError:
    from urlparse import urljoin, scheme_chars
    from urllib import pathname2url


from pympress.util import fileopen


def get_extension(mime_type):
    """ Returns a valid filename extension (recognized by python) for a given mime type.

    Args:
        mime_type (`str`): The mime type for which to find an extension

    Returns:
        `str`: A file extension used for the given mimetype
    """
    if not mimetypes.inited:
        mimetypes.init()
    for ext in mimetypes.types_map:
        if mimetypes.types_map[ext] == mime_type:
            return ext


class PdfPage(enum.IntEnum):
    """ Represents the part of a PDF page that we want to draw.
    """
    #: No notes on PDF page, only falsy value
    NONE    = 0
    #: Full PDF page (without notes)
    FULL    = 1
    #: Bottom half of PDF page
    BOTTOM  = 2
    #: Top half of PDF page
    TOP     = 3
    #: Right half of PDF page
    RIGHT   = 4
    #: Left half of PDF page
    LEFT    = 5

    def complement(val):
        """ Return the enum value for the other part of the page.
        """
        return PdfPage(val ^ 1)


    def scale(val):
        """ Return the enum value that does only scaling not shifting.
        """
        return PdfPage(val | 1)


    def direction(val):
        """ Returns whether the pdf page/notes mode is horizontal or vertical.

        Returns:
            `str`: a string representing the direction that can be used as the key in the config section
        """
        return 'horizontal' if val >= 4 else 'vertical'


    def from_screen(val, x, y, x2 = None, y2 = None):
        """ Transform visible part of the page coordinates to full page coordinates.

        Pass 2 floats to transform coordinates, 4 to transform margins,
        i.e. the second pair of coordinates is taken from the opposite corner.

        Args:
            x (`float`): x coordinate on the screen, on a scale 0..1
            y (`float`): y coordinate on the screen, on a scale 0..1
            x2 (`float`): second x coordinate on the screen, from the other side, on a scale 0..1
            y2 (`float`): second y coordinate on the screen, from the other side, on a scale 0..1
        """
        if val == PdfPage.RIGHT:
            page = ((1 + x) / 2., y)
        elif val == PdfPage.LEFT:
            page = (x / 2., y)
        elif val == PdfPage.BOTTOM:
            page = (x, (1 + y) / 2.)
        elif val == PdfPage.TOP:
            page = (x, y / 2.)
        else:
            page = (x, y)

        if x2 is None or y2 is None:
            return page
        else:
            return page + val.complement().from_screen(x2, y2)


    def to_screen(val, x, y, x2 = None, y2 = None):
        """ Transform full page coordinates to visible part coordinates.

        Pass 2 floats to transform coordinates, 4 to transform margins,
        i.e. the second pair of coordinates is taken from the opposite corner.

        Args:
            x (`float`): x coordinate on the page, on a scale 0..1
            y (`float`): y coordinate on the page, on a scale 0..1
            x2 (`float`): second x coordinate on the page, from the other side, on a scale 0..1
            y2 (`float`): second y coordinate on the page, from the other side, on a scale 0..1
        """
        if val == PdfPage.RIGHT:
            screen = (x * 2 - 1, y)
        elif val == PdfPage.LEFT:
            screen = (x * 2, y)
        elif val == PdfPage.BOTTOM:
            screen = (x, y * 2 - 1)
        elif val == PdfPage.TOP:
            screen = (x, y * 2)
        else:
            screen = (x, y)

        if x2 is None or y2 is None:
            return screen
        else:
            return screen + val.complement().to_screen(x2, y2)


class Link(object):
    """ This class encapsulates one hyperlink of the document.

    Args:
        x1 (`float`):  first x coordinate of the link rectangle
        y1 (`float`):  first y coordinate of the link rectangle
        x2 (`float`):  second x coordinate of the link rectangle
        y2 (`float`):  second y coordinate of the link rectangle
        action (`function`):  action to perform when the link is clicked
    """

    #: `float`, first x coordinate of the link rectangle
    x1 = None
    #: `float`, first y coordinate of the link rectangle
    y1 = None
    #: `float`, second x coordinate of the link rectangle
    x2 = None
    #: `float`, second y coordinate of the link rectangle
    y2 = None
    #: `function`, action to be perform to follow this link
    follow = lambda *args, **kwargs: logger.error(_("no action defined for this link!"))

    def __init__(self, x1, y1, x2, y2, action):
        self.x1, self.y1, self.x2, self.y2 = x1, y1, x2, y2
        self.follow = action


    def is_over(self, x, y):
        """ Tell if the input coordinates are on the link rectangle.

        Args:
            x (`float`):  input x coordinate
            y (`float`):  input y coordinate

        Returns:
            `bool`: `True` if the input coordinates are within the link rectangle, `False` otherwise
        """
        return ((self.x1 <= x) and (x <= self.x2) and (self.y1 <= y) and (y <= self.y2))


    @staticmethod
    def build_closure(fun, *args, **kwargs):
        r""" Return a lambda that calls fun(\*args, \**kwargs), with the current value of args and kwargs.

        By creating the lambda in a new scope, we bind the arguments.

        Args:
            fun (`function`): The function to be called
            args (`tuple`): non-keyworded variable-length argument list to pass to fun()
            kwargs (`dict`): keyworded variable-length argument dict to pass to fun()
        """
        return lambda *a, **k: fun(*(tuple(args) + tuple(a)), **dict(kwargs, **k))



class Page(object):
    """ Class representing a single page.

    It provides several methods used by the GUI for preparing windows for
    displaying pages, managing hyperlinks, etc.

    Args:
        doc (:class:`~Poppler.Page`):  the poppler object around the page
        number (`int`):  number of the page to fetch in the document
        parent (:class:`~pympress.document.Document`):  the parent Document class
    """

    #: Page handled by this class (instance of :class:`~Poppler.Page`)
    page = None
    #: `int`, number of the current page (starting from 0)
    page_nb = -1
    #: `str` representing the page label
    page_label = None
    #: All the links in the page, as a `list` of :class:`~pympress.document.Link` instances
    links = []
    #: All the media in the page, as a `list` of tuples of (area, filename)
    medias = []
    #: `float`, page width
    pw = 0.
    #: `float`, page height
    ph = 0.
    #: All text annotations
    annotations = []
    #: Instance of :class:`~pympress.document.Document` that contains this page.
    parent = None

    def __init__(self, page, number, parent):
        self.page = page
        self.page_nb = number
        self.parent = parent
        self.page_label = self.page.get_label()
        self.links = []
        self.medias = []
        self.annotations = []

        # Read page size
        self.pw, self.ph = self.page.get_size()

        # Read links on the page
        for link in self.page.get_link_mapping():
            action = self.get_link_action(link.action.type, link.action)
            my_link = Link(link.area.x1, link.area.y1, link.area.x2, link.area.y2, action)
            self.links.append(my_link)

        # Read annotations, in particular those that indicate media
        for annotation in self.page.get_annot_mapping():
            content = annotation.annot.get_contents()
            if content:
                self.annotations.append(content)

            annot_type = annotation.annot.get_annot_type()
            if annot_type == Poppler.AnnotType.LINK:
                # just an Annot, not subclassed -- probably redundant with links
                continue
            elif annot_type == Poppler.AnnotType.MOVIE:
                movie = annotation.annot.get_movie()
                filepath = self.parent.get_full_path(movie.get_filename())
                if filepath:
                    # TODO there is no autoplay, or repeatCount
                    relative_margins = Poppler.Rectangle()
                    relative_margins.x1 = annotation.area.x1 / self.pw        # left
                    relative_margins.x2 = 1.0 - annotation.area.x2 / self.pw  # right
                    relative_margins.y1 = annotation.area.y1 / self.ph        # bottom
                    relative_margins.y2 = 1.0 - annotation.area.y2 / self.ph  # top
                    media = (relative_margins, filepath, movie.show_controls())
                    self.medias.append(media)
                    action = Link.build_closure(self.parent.play_media, hash(media))
                else:
                    logger.error(_("Pympress can not find file ") + movie.get_filename())
                    continue
            elif annot_type == Poppler.AnnotType.SCREEN:
                action_obj = annotation.annot.get_action()
                if not action_obj:
                    continue
                action = self.get_annot_action(action_obj.any.type, action_obj, annotation.area)
                if not action:
                    continue
            elif annot_type == Poppler.AnnotType.FILE_ATTACHMENT:
                attachment = annotation.annot.get_attachment()
                prefix, ext = os.path.splitext(attachment.name)
                with tempfile.NamedTemporaryFile('wb', suffix=ext, prefix=prefix, delete=False) as f:
                    # now the file name is shotgunned
                    filename = f.name
                    self.parent.remove_on_exit(filename)
                if not attachment.save(filename):
                    logger.error(_("Pympress can not extract attached file"))
                    continue
                action = Link.build_closure(fileopen, filename)
            elif annot_type in {Poppler.AnnotType.TEXT, Poppler.AnnotType.POPUP,
                                Poppler.AnnotType.FREE_TEXT}:
                # text-only annotations, hide them from screen
                self.page.remove_annot(annotation.annot)
                continue
            elif annot_type in {Poppler.AnnotType.STRIKE_OUT, Poppler.AnnotType.HIGHLIGHT,
                                Poppler.AnnotType.UNDERLINE, Poppler.AnnotType.SQUIGGLY,
                                Poppler.AnnotType.POLYGON, Poppler.AnnotType.POLY_LINE,
                                Poppler.AnnotType.SQUARE, Poppler.AnnotType.CIRCLE,
                                Poppler.AnnotType.CARET, Poppler.AnnotType.LINE,
                                Poppler.AnnotType.STAMP, Poppler.AnnotType.INK}:
                # Poppler already renders annotation of these types, nothing more can be done
                # even though the rendering isn't always perfect.
                continue
            else:
                logger.warning(_("Pympress can not interpret annotation of type:") + " {} ".format(annot_type))
                continue

            my_annotation = Link(annotation.area.x1, annotation.area.y1, annotation.area.x2, annotation.area.y2, action)
            self.links.append(my_annotation)


    def get_link_action(self, link_type, action):
        """ Get the function to be called when the link is followed.

        Args:
            link_type (:class:`~Poppler.ActionType`): The type of action to be performed
            action (:class:`~Poppler.Action`): The atcion to be performed

        Returns:
            `function`: The function to be called to follow the link
        """
        # Poppler.ActionType.RENDITION should only appear in annotations, right? Otherwise how do we know
        # where to render it? Any documentation on which action types are admissible in links vs in annots
        # is very welcome. For now, link is fallback to annot so contains all action types.
        if link_type == Poppler.ActionType.NONE:
            return lambda: None

        elif link_type == Poppler.ActionType.GOTO_DEST:
            dest_type = action.goto_dest.dest.type
            if dest_type == Poppler.DestType.NAMED:
                dest = self.parent.doc.find_dest(action.goto_dest.dest.named_dest)
                if dest:
                    return Link.build_closure(self.parent.goto, dest.page_num - 1)
                else:
                    warning = _('Unrecognized named destination: ') + str(action.goto_dest.dest.named_dest)
            elif dest_type != Poppler.DestType.UNKNOWN:
                return Link.build_closure(self.parent.goto, action.goto_dest.dest.page_num - 1)

        elif link_type == Poppler.ActionType.NAMED:
            dest_name = action.named.named_dest
            dest = self.parent.doc.find_dest(dest_name)

            if dest:
                return Link.build_closure(self.parent.goto, dest.page_num)
            elif dest_name == "GoBack":
                return self.parent.hist_prev
            elif dest_name == "GoForward":
                return self.parent.hist_next
            elif dest_name == "FirstPage":
                return Link.build_closure(self.parent.goto, 0)
            elif dest_name == "PrevPage":
                return Link.build_closure(self.parent.goto, self.page_nb - 1)
            elif dest_name == "NextPage":
                return Link.build_closure(self.parent.goto, self.page_nb + 1)
            elif dest_name == "LastPage":
                return Link.build_closure(self.parent.goto, self.parent.pages_number() - 1)
            elif dest_name == "GoToPage":
                # Same as the 'G' action which allows one to pick a page to jump to
                return Link.build_closure(self.parent.start_editing_page_number, )
            elif dest_name == "Find":
                # TODO popup a text box and search results with Page.find_text
                # http://lazka.github.io/pgi-docs/Poppler-0.18/classes/Page.html#Poppler.Page.find_text
                warning = _("Pympress does not yet support link type \"{}\" to \"{}\"").format(link_type, dest_name)
            else:
                # TODO find out other possible named actions?
                warning = _("Pympress does not recognize link type \"{}\" to \"{}\"").format(link_type, dest_name)

        elif link_type == Poppler.ActionType.LAUNCH:
            launch = action.launch
            if launch.params:
                logger.warning("ignoring params: " + str(launch.params))

            filepath = self.parent.get_full_path(launch.file_name)
            if not filepath:
                logger.error("can not find file " + launch.file_name)
                return lambda: None

            else:
                return Link.build_closure(fileopen, filepath)

        elif link_type == Poppler.ActionType.URI:
            return Link.build_closure(webbrowser.open_new_tab, action.uri.uri)

        elif link_type == Poppler.ActionType.RENDITION:  # Poppler 0.22
            warning = _("Pympress does not yet support link type \"{}\"").format(link_type)
        elif link_type == Poppler.ActionType.MOVIE:  # Poppler 0.20
            warning = _("Pympress does not yet support link type \"{}\"").format(link_type)
        elif link_type == Poppler.ActionType.GOTO_REMOTE:
            warning = _("Pympress does not yet support link type \"{}\"").format(link_type)
        elif link_type == Poppler.ActionType.OCG_STATE:
            warning = _("Pympress does not yet support link type \"{}\"").format(link_type)
        elif link_type == Poppler.ActionType.JAVASCRIPT:
            warning = _("Pympress does not yet support link type \"{}\"").format(link_type)
        elif link_type == Poppler.ActionType.UNKNOWN:
            warning = _("Pympress does not yet support link type \"{}\"").format(link_type)
        else:
            warning = _("Pympress does not recognize link type \"{}\"").format(link_type)

        logger.info(warning)
        return Link.build_closure(logger.warning, _('Unsupported link clicked. ') + warning)


    def get_annot_action(self, link_type, action, rect):
        """ Get the function to be called when the link is followed.

        Args:
            link_type (:class:`~Poppler.ActionType`): The link type
            action (:class:`~Poppler.Action`): The action to be performed when the link is clicked
            rect (:class:`~Poppler.Rectangle`): The region of the page where the link is

        Returns:
            `function`: The function to be called to follow the link
        """
        if link_type == Poppler.ActionType.RENDITION:
            media = action.rendition.media
            if media.is_embedded():
                ext = get_extension(media.get_mime_type())
                with tempfile.NamedTemporaryFile('wb', suffix=ext, prefix='pdf_embed_', delete=False) as f:
                    # now the file name is shotgunned
                    filename = f.name
                    self.parent.remove_on_exit(filename)
                if not media.save(filename):
                    logger.error(_("Pympress can not extract embedded media"))
                    return None
            else:
                filename = self.parent.get_full_path(media.get_filename())
                if not filename:
                    logger.error(_("Pympress can not find file ") + media.get_filename())
                    return None

            # TODO grab the show_controls, autoplay, repeat
            relative_margins = Poppler.Rectangle()
            relative_margins.x1 = rect.x1 / self.pw        # left
            relative_margins.x2 = 1.0 - rect.x2 / self.pw  # right
            relative_margins.y1 = rect.y1 / self.ph        # bottom
            relative_margins.y2 = 1.0 - rect.y2 / self.ph  # top

            media = (relative_margins, filename, False)
            self.medias.append(media)
            return Link.build_closure(self.parent.play_media, hash(media))

        else:
            return self.get_link_action(link_type, action)


    def number(self):
        """ Get the page number.
        """
        return self.page_nb


    def label(self):
        """ Get the page label.
        """
        return self.page_label


    def get_link_at(self, x, y, dtype=PdfPage.FULL):
        """ Get the :class:`~pympress.document.Link` corresponding to the given position.

        Returns `None` if there is no link at this position.

        Args:
            x (`float`):  horizontal coordinate
            y (`float`):  vertical coordinate
            dtype (:class:`~pympress.document.PdfPage`):  the type of document to consider

        Returns:
            :class:`~pympress.document.Link`: the link at the given coordinates
            if one exists, `None` otherwise
        """
        x, y = dtype.from_screen(x, y)

        xx = self.pw * x
        yy = self.ph * (1. - y)

        for link in self.links:
            if link.is_over(xx, yy):
                return link

        return None


    def get_size(self, dtype=PdfPage.FULL):
        """ Get the page size.

        Args:
            dtype (:class:`~pympress.document.PdfPage`):  the type of document to consider

        Returns:
            `(float, float)`: page size
        """
        return dtype.scale().from_screen(self.pw, self.ph)


    def get_aspect_ratio(self, dtype=PdfPage.FULL):
        """ Get the page aspect ratio.

        Args:
            dtype (:class:`~pympress.document.PdfPage`):  the type of document to consider

        Returns:
            `float`: page aspect ratio
        """
        w, h = self.get_size(dtype)
        return w / h


    def get_annotations(self):
        """ Get the list of text annotations on this page.

        Returns:
            `list` of `str`: annotations on this page
        """
        return self.annotations


    def get_media(self):
        """ Get the list of medias this page might want to play.

        Returns:
            `list`: medias in this page
        """
        return self.medias


    def render_cairo(self, cr, ww, wh, dtype=PdfPage.FULL):
        """ Render the page on a Cairo surface.

        Args:
            cr (:class:`~Gdk.CairoContext`):  target surface
            ww (`int`):  target width in pixels
            wh (`int`):  target height in pixels
            dtype (:class:`~pympress.document.PdfPage`):  the type of document that should be rendered
        """
        pw, ph = self.get_size(dtype)

        cr.set_source_rgb(1, 1, 1)

        # Scale
        scale = min(ww / pw, wh / ph)
        cr.scale(scale, scale)

        cr.rectangle(0, 0, pw, ph)
        cr.fill()

        # For "regular" pages, there is no problem: just render them.
        # For other pages (i.e. half of a page), the widget already has correct
        # dimensions so we don't need to deal with that. But for right and bottom
        # halfs we must translate the output in order to only show the correct half.
        if dtype == PdfPage.RIGHT:
            cr.translate(-pw, 0)
        elif dtype == PdfPage.BOTTOM:
            cr.translate(0, -ph)

        self.page.render(cr)


    def can_render(self):
        """ Informs that rendering *is* necessary (avoids checking the type).

        Returns:
            `bool`: `True`, do rendering
        """
        return True



class Document(object):
    """ This is the main document handling class.

    .. note:: The internal page numbering scheme is the same as in Poppler: it
       starts at 0.

    Args:
        builder (:class:`pympress.builder.Builder`):  A builder to load callbacks
        pop_doc (:class:`~pympress.Poppler.Document`):  Instance of the Poppler document that this class will wrap
        path (`str`):  Absolute path to the PDF file to open
        page (`int`):  page number to which the file should be opened
    """

    #: Current PDF document (:class:`~Poppler.Document` instance)
    doc = None
    #: Path to pdf
    path = None
    #: Number of pages in the document
    nb_pages = -1
    #: Number of the current page
    cur_page = -1
    #: Pages cache (`dict` of :class:`~pympress.document.Page`). This makes
    #: navigation in the document faster by avoiding calls to Poppler when loading
    #: a page that has already been loaded.
    pages_cache = {}
    #: Files that are temporary and need to be removed
    temp_files = set()
    #: History of pages we have visited
    history = []
    #: Our position in the history
    hist_pos = -1
    #: `dict` of all the page labels
    page_labels = []

    #: callback, to be connected to :func:`~pympress.ui.UI.on_page_change`
    page_change = lambda p: None
    #: callback, to be connected to :func:`~pympress.extras.Media.play`
    play_media = lambda h: None
    #: callback, to be connected to :func:`~pympress.editable_label.PageNumber.start_editing`
    start_editing_page_number = lambda: None

    def __init__(self, builder, pop_doc, path, page=0):
        # Connect callbacks
        self.play_media                = builder.get_callback_handler('medias.play')
        self.page_change               = builder.get_callback_handler('on_page_change')
        self.start_editing_page_number = builder.get_callback_handler('page_number.start_editing')

        # Setup PDF file
        self.path = path
        self.doc = pop_doc

        # Pages number
        self.nb_pages = self.doc.get_n_pages()
        self.page_labels = [self.doc.get_page(n).get_label() for n in range(self.nb_pages)]

        # Number of the current page
        self.cur_page = page
        self.history.append(page)
        self.hist_pos = 0

        # Pages cache
        self.pages_cache = {}


    def get_structure(self, index_iter = None):
        """ Gets the structure of the document from its index.

        Recursive, pass the iterator.

        Args:
            index_iter (:class:`~Poppler.IndexIter` or `None`): the iterator for the child index to explore.

        Returns:
            `list`: A list of tuples (depth, page number, title)
        """
        try:
            if index_iter is None:
                index_iter = Poppler.IndexIter(self.doc)
        except TypeError:
            return {}
        if index_iter is None:
            return {}

        index = {}
        while True:
            action = index_iter.get_action()
            title = ''
            try:
                if action.type == Poppler.ActionType.GOTO_DEST:
                    title = action.goto_dest.title
                    if action.goto_dest.dest.type == Poppler.DestType.NAMED:
                        dest = self.doc.find_dest(action.goto_dest.dest.named_dest)
                        page = dest.page_num - 1
                    elif action.goto_dest.dest.type == Poppler.DestType.UNKNOWN:
                        raise AssertionError('Unknown type of destination')
                    else:
                        page = action.goto_dest.dest.page_num - 1
                else:
                    raise AssertionError('Unexpected type of action')
            except Exception:
                logger.error(_('Unexpected action in index "{}"').format(action.type))
                page = None

            new_entry = {'title': title}
            child = index_iter.get_child()
            if child:
                new_entry['children'] = self.get_structure(child)

            # there should not be synonymous sections, correct the page here to a better guess
            if page is None or page in index:
                if 'children' in new_entry:
                    page = min(new_entry['children'])
                else:
                    lower_bound = max(index)
                    find = index[lower_bound]
                    while 'children' in find:
                        lower_bound = max(find)
                        find = find[lower_bound]

                    try:
                        page = min(l for l, n in enumerate(self.page_labels)
                                   if n == self.page_labels[page] and l > lower_bound)
                    except ValueError:  # empty iterator
                        page = lower_bound + 1


            index[page] = new_entry

            if not index_iter.next():
                break

        return index


    @staticmethod
    def path_to_uri(path):
        """ Transform a path to a file URI, and maintains others URIs.
        """
        # Do not trust urlsplit, manually check we have an URI
        pos = path.index(':') if ':' in path else -1
        if path[pos:pos + 3] == '://' or (pos > 1 and set(path[:pos]) <= scheme_chars):
            return path
        else:
            return urljoin('file:', pathname2url(path))


    @staticmethod
    def create(builder, path, page=0):
        """ Initializes a Document by passing it a :class:`~Poppler.Document`.

        Args:
            builder (:class:`pympress.builder.Builder`):  A builder to load callbacks
            path (`str`):  Absolute path to the PDF file to open
            page (`int`):  page number to which the file should be opened

        Returns:
            :class:`~pympress.document.Document`: The initialized document
        """
        if path is None:
            doc = EmptyDocument()
        else:
            uri = Document.path_to_uri(path)
            poppler_doc = Poppler.Document.new_from_file(uri, None)
            doc = Document(builder, poppler_doc, path, page)

        return doc


    def guess_notes(self, horizontal, vertical):
        """ Get our best guess for the document mode.

        Args:
            horizontal (`str`): A string representing the preference for horizontal slides
            vertical (`str`): A string representing the preference for vertical slides

        Returns:
            :class:`~pympress.document.PdfPage`: the notes mode
        """
        page = self.page(self.cur_page) or self.page(0)
        if page is None:
            return PdfPage.NONE

        ar = page.get_aspect_ratio()

        # "Regular" slides will have an aspect ratio of 4/3, 16/9, 16/10... i.e. in the range [1..2]
        # So if the aspect ratio is >= 2, we can assume it is a document with notes on the side.
        if ar >= 2:
            try:
                return PdfPage[horizontal.upper()]
            except KeyError:
                return PdfPage.RIGHT

        # Make exception for classic american letter format and ISO (A4, B5, etc.)
        if abs(ar - 8.5 / 11) < 1e-3 or abs(ar - 1 / math.sqrt(2)) < 1e-3:
            return PdfPage.NONE

        # If the aspect ratio is < 1, we can assume it is a document with notes above or below.
        if ar < 1:
            try:
                return PdfPage[vertical.upper()]
            except KeyError:
                return PdfPage.BOTTOM

        return PdfPage.NONE


    def page(self, number):
        """ Get the specified page.

        Args:
            number (`int`):  number of the page to return

        Returns:
            :class:`~pympress.document.Page`: the wanted page, or `None` if it does not exist
        """
        if number >= self.nb_pages or number < 0:
            return None

        if number not in self.pages_cache:
            self.pages_cache[number] = Page(self.doc.get_page(number), number, self)
        return self.pages_cache[number]


    def current_page(self):
        """ Get the current page.

        Returns:
            :class:`~pympress.document.Page`: the current page
        """
        return self.page(self.cur_page)


    def next_page(self):
        """ Get the next page.

        Returns:
            :class:`~pympress.document.Page`: the next page, or `None` if this is the last page
        """
        return self.page(self.cur_page + 1)


    def pages_number(self):
        """ Get the number of pages in the document.

        Returns:
            `int`: the number of pages in the document
        """
        return self.nb_pages


    def _do_page_change(self, number):
        """ Perform the actual change of page and UI notification.

        The page number is **not** checked here, so it must be within bounds already.

        Args:
            number (`int`):  number of the destination page
        """
        self.cur_page = number
        self.page_change()


    def has_labels(self):
        """ Return whether this document has useful labels.

        Returns:
            `bool`: False iff there are no labels or they are just the page numbers
        """
        return self.page_labels != [str(n + 1) for n in range(self.nb_pages)]


    def lookup_label(self, label, prefix_unique = True):
        """ Find a page from its label.

        Args:
            label (`str`): the label we are searching for
            prefix_unique (`bool`): whether a prefix match should be unique, e.g. when the user is still typing

        Returns:
            `int`: the page
        """
        # somehow this always returns None:
        # page = self.doc.get_page_by_label(label).get_index()

        # make a shortlist: squash synonymous labels, keeping the last one
        compatible_labels = {l: n for n, l in enumerate(self.page_labels) if l.lower().startswith(label.lower())}

        if len(compatible_labels) == 1:
            return set(compatible_labels.values()).pop()

        # try exact match
        try:
            return compatible_labels[label]
        except KeyError:
            pass

        # try case-insensitive match, prefix case-sensitive match, prefix case-insensitive match (unless prefix_unique)
        full = len(label)
        for filtering in [lambda l: len(l) == full, lambda l: l.startswith(label), lambda l: not prefix_unique]:
            try:
                found = next(l for l in compatible_labels if filtering(l))
            except StopIteration:
                continue
            return compatible_labels[found]
        else:
            return None


    def goto(self, number):
        """ Switch to another page.

        Args:
            number (`int`):  number of the destination page
        """
        if number < 0:
            number = 0
        if number >= self.nb_pages:
            number = self.nb_pages - 1

        if number != self.cur_page:
            # chop off history where we were and go to end
            self.hist_pos += 1
            if self.hist_pos < len(self.history):
                self.history = self.history[:self.hist_pos]
            self.history.append(number)

            self._do_page_change(number)


    def goto_next(self, *args):
        """ Switch to the next page.
        """
        self.goto(self.cur_page + 1)


    def goto_prev(self, *args):
        """ Switch to the previous page.
        """
        self.goto(self.cur_page - 1)


    def goto_home(self, *args):
        """ Switch to the first page.
        """
        self.goto(0)


    def goto_end(self, *args):
        """ Switch to the last page.
        """
        self.goto(self.nb_pages - 1)


    def label_after(self, page):
        """ Switch to the next page with different label.

        If we're within a set of pages with the same label we want to go to the last one.
        """
        labels_after = enumerate(self.page_labels[page + 1:], page + 1)

        try:
            next_page, next_label = next(labels_after)
        except StopIteration:
            # we're already at the last page!
            return page

        # will stop as soon as next_page + 1 (aka following_page) is a different label or due to end of iterator
        for following_page, following_label in labels_after:
            if following_label == next_label:
                next_page = following_page
            else:
                break

        return next_page


    def label_before(self, page):
        """ Switch to the previous page with different label.

        If we're within a set of pages with the same label we want to go *before* the first one.
        """
        # will stop as soon as we find a different label or due to end of iterator
        for prev_page, prev_label in enumerate(reversed(self.page_labels[:page])):
            if prev_label != self.page_labels[page]:
                return page - 1 - prev_page
        else:
            return 0


    def label_next(self, *args):
        """ Switch to the next page with different label.
        """
        self.goto(self.label_after(self.cur_page))


    def label_prev(self, *args):
        """ Switch to the previous page with different label.
        """
        self.goto(self.label_before(self.cur_page))


    def hist_next(self, *args):
        """ Switch to the page we viewed next.
        """
        if self.hist_pos + 1 == len(self.history):
            return

        self.hist_pos += 1
        self._do_page_change(self.history[self.hist_pos])


    def hist_prev(self, *args):
        """ Switch to the page we viewed before.
        """
        if self.hist_pos == 0:
            return

        self.hist_pos -= 1
        self._do_page_change(self.history[self.hist_pos])


    def get_uri(self):
        """ Gives access to the URI, rather than the path, of this document.

        Returns:
            `str`: the URI to the file currently opened.
        """
        return self.path_to_uri(self.path)


    def get_full_path(self, filename):
        """ Returns full path, extrapolated from a path relative to this document or to the current directory.

        Args:
            filename (`str`):  Name of the file or relative path to it

        Returns:
            `str`: the full path to the file or None if it doesn't exist
        """
        filepath = None
        if os.path.isabs(filename):
            return os.path.normpath(filename) if os.path.exists(filename) else None

        for d in [os.path.dirname(self.path), os.getcwd()]:
            filepath = os.path.normpath(os.path.join(d, filename))
            if os.path.exists(filepath):
                return filepath


    def remove_on_exit(self, filename):
        """ Remember a temporary file to delete later.

        Args:
            filename (`str`): The path to the file to delete
        """
        self.temp_files.add(filename)


    def cleanup_media_files(self):
        """ Removes all files that were extracted from the pdf into the filesystem.
        """
        for f in self.temp_files:
            os.remove(f)
        self.temp_files.clear()


class EmptyPage(Page):
    """ A dummy page, placeholder for when there are no valid pages around.

    This page is a non-notes page with an aspect ratio of 1.3 and nothing else inside.
    Also, it has no "rendering" capability, and is made harmless by overriding its render function.
    """

    def __init__(self):
        self.page = None
        self.page_nb = -1
        self.parent = None
        self.page_label = None
        self.links = []
        self.medias = []
        self.annotations = []

        # by default, anything that will have a 1.3 asapect ratio
        self.pw, self.ph = 1.3, 1.0


    def render_cairo(self, cr, ww, wh, dtype=PdfPage.FULL):
        """ Overriding this purely for safety: make sure we do not accidentally try to render.

        Args:
            cr (:class:`~Gdk.CairoContext`):  target surface
            ww (`int`):  target width in pixels
            wh (`int`):  target height in pixels
            dtype (:class:`~pympress.document.PdfPage`):  the type of document that should be rendered
        """
        pass


    def can_render(self):
        """ Informs that rendering is *not* necessary (avoids checking the type).

        Returns:
            `bool`: `False`, no rendering
        """
        return False


class EmptyDocument(Document):
    """ A dummy document, placeholder for when no document is open.
    """

    def __init__(self):
        self.path = None
        self.doc = None
        self.nb_pages = 0
        self.cur_page = -1
        self.pages_cache = {-1: EmptyPage()}
        self.notes = False


    def page(self, number):
        """ Retrieve a page from the document.

        Args:
            number (`int`): page number to be retrieved

        Returns:
            :class:`~pympress.document.EmptyPage` or `None`: -1 returns the empty page so we can display something.
        """
        return self.pages_cache[number] if number in self.pages_cache else None


##
# Local Variables:
# mode: python
# indent-tabs-mode: nil
# py-indent-offset: 4
# fill-column: 80
# end: