python source code of fb2html

# -*- coding: utf-8 -*-

import os, os.path, sys
import re
import io
import codecs
import uuid
import base64
import hashlib
import html
from copy import deepcopy
from typing import Tuple
from slugify import slugify

from modules.image_utils import ImageText
from modules.utils import make_dir, copy_file, format_pattern
from modules.myhyphen import MyHyphen

import cssutils

from lxml import etree, objectify
from PIL import Image, ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True

HTMLHEAD = ('<html xmlns="http://www.w3.org/1999/xhtml">' '<head>' '<title>fb2mobi.py</title>' '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>' '<link rel="stylesheet" type="text/css" href="stylesheet.css"/>' '</head>' '<body>')

HTMLFOOT = ('</body>' '</html>')


def ns_tag(tag):
    if tag is not etree.Comment:
        if tag[0] == '{':
            tag = tag.split('}', 1)[1]
    return tag


def save_html(string):
    if string:
        return html.escape(string, quote=False)
    else:
        return ''


def sanitize_id(string):
    if string:
        return string.replace('\r', '').replace('\n', '').replace(' ', '')
    else:
        return ''


def write_file(buff, filename):
    make_dir(filename)
    with codecs.open(filename, 'w', 'utf-8') as f:
        f.write(buff)


def write_file_bin(buff, filename):
    make_dir(filename)
    with open(filename, 'wb') as f:
        f.write(buff)


class Fb2XHTML:

    def __init__(self, fb2file, tempdir, config):

        self.log = config.log

        self.kindle = config.output_format.lower() in ('mobi', 'azw3')

        self.buff = []
        self.current_header_level = 0  # Уровень текущего заголовка
        self.header = False  # Признак формирования заголовка
        self.subheader = False  # Признак формирования подзаголовка
        self.first_chapter_line = False  # Признак первой строки в главе (секции) - для расстановки dropcaps
        self.inline_image_mode = False  # Индикатор режима вставки картинок (inline)
        self.body_name = ''  # Имя текущего раздела body, например notes
        self.no_paragraph = False  # Индикатор, что последующий парагаф находится в эпиграфе, аннотации и т.п.
        self.first_header_in_body = True  # Признак первого заголовка в секции body

        # Make sure book title is never empty
        temp_book_name = os.path.basename(fb2file)
        if not temp_book_name:
            temp_book_name = fb2file
        if os.path.splitext(temp_book_name)[1].lower() == '.fb2':
            temp_book_name = os.path.splitext(temp_book_name)[0]

        self.orig_file_name = fb2file

        self.book_title = temp_book_name  # Название книги
        self.book_authors = []  # Author(s)
        self.book_lang = 'ru'  # Язык книги, по-умолчанию 'ru'
        self.book_series = ''  # Книжная серия
        self.book_series_num = ''  # Номер в книжной серии
        self.book_cover = ''  # Ссылка на файл изображения обложки книги
        self.book_date = ''

        self.dropcaps = config.current_profile['dropcaps'].lower()  # Признак вставки стилей буквицы (dropcaps)
        self.nodropcaps = config.no_dropcaps_symbols  # Строка символов, для исключения буквицы

        # Максимальный уровень заголовка (секции) для помещения в содержание (toc.xhtml)
        # В toc.ncx помещаются все уровни
        self.toc_max_level = config.current_profile['tocMaxLevel'] if config.current_profile['tocMaxLevel'] else 1000
        # How to split toc.ncx for Kindle (eInk devices only show 2 levels)
        self.toc_kindle_level = config.current_profile['tocKindleLevel'] if config.current_profile['tocKindleLevel'] else 2

        self.authorformat = config.current_profile['authorFormat']
        self.booktitleformat = config.current_profile['bookTitleFormat']

        self.css_file = config.current_profile['css']
        self.parse_css = config.current_profile['parse_css']

        self.open_book_from_cover = config.current_profile['openBookFromCover']
        if self.open_book_from_cover and self.kindle:
            self.open_book_from_cover = False
            self.log.warning('For "kindle" books (mobi, azw3) option "open book from cover" is ignored')

        self.annotation = None

        self.generate_toc_page = config.current_profile['generateTOCPage']
        self.generate_annotation_page = config.current_profile['generateAnnotationPage']
        self.generate_opf_guide = config.current_profile['generateOPFGuide']

        self.vignettes = config.current_profile['vignettes']
        self.vignette_files = []

        self.removepngtransparency = config.current_profile['removePngTransparency']  # Remove transparency in PNG images

        self.annotation_title = config.current_profile['annotationTitle']  # Заголовок для раздела аннотации
        self.toc_title = config.current_profile['tocTitle']  # Заголовок для раздела содержания

        self.chaptersplit = config.current_profile['chapterOnNewPage']  # Разделять на отдельные файлы по главам
        self.chapterlevel = config.current_profile['chapterLevel']

        self.seriespositions = config.current_profile['seriesPositions']

        self.tocbeforebody = config.current_profile['tocBeforeBody']  # Положение содержания - в начале либо в конце книги
        self.transliterate_author_and_title = config.transliterate_author_and_title

        self.screen_width = config.screen_width
        self.screen_height = config.screen_height

        self.toc_type = config.current_profile['tocType']

        self.toc = {}  # Содрержание, формируется по мере парсинга
        self.toc_index = 1  # Текущий номер раздела содержания
        # Имя текущего файла для записи текста книги в xhtml.
        self.current_file = 'index.xhtml'
        self.current_file_index = 0

        # Для включения сносок и комментариев в текст книги
        self.notes_dict = {}  # Словарь со сносками и комментариями
        self.notes_order = []  # Notes in order of discovery
        self.notes_titles = {}  # Dictionary of note body titles
        self.notes_mode = config.current_profile['notesMode']  # Режим отображения сносок: inline, block
        self.notes_bodies = config.current_profile['notesBodies']
        self.current_notes = []  # Переменная для хранения текущей сноски

        self.temp_dir = tempdir  # Временный каталог для записи промежуточных файлов
        self.temp_content_dir = os.path.join(self.temp_dir, 'OEBPS')
        self.temp_inf_dir = os.path.join(self.temp_dir, 'META-INF')

        self.html_file_list = []  # Массив для хранения списка сгенерированных xhtml файлов
        self.image_file_list = []  # Массив для хранения списка картинок
        self.image_count = 0

        self.pages_list = {}  # Additional pages per file
        self.page_length = 0

        self.characters_per_page = config.characters_per_page

        # Additional cover processiing

        # when set to Top, Center or Bottom cover will be stamped
        self.cover_stamp = config.current_profile['coverStamp']
        # when book has no cover image this file will be used instead
        self.cover_default = config.current_profile['coverDefault']
        # if it ponts to font file and cover_stamp is not 'None' cover will be stamped
        self.cover_font = config.current_profile['coverTextFont']

        # if not 0 - scale all images but cover
        self.images_scale = config.current_profile['scaleImages']

        self.genres = []

        self.tree = etree.parse(fb2file, parser=etree.XMLParser(recover=True))
        if 'xslt' in config.current_profile:

            # rupor - this allows for smaller xsl, quicker replacement and allows handling of tags in the paragraphs
            class MyExtElement(etree.XSLTExtension):

                def execute(self, _, self_node, input_node, output_parent):
                    child = deepcopy(input_node)
                    found = False
                    for elem in child.getiterator():
                        if not found and elem.text is not None:
                            found = True
                            old_text = elem.text
                            elem.text = self_node.text
                            if len(old_text) > 1:
                                i = 1
                                for c in old_text[1:]:
                                    if c.isspace():
                                        i += 1
                                    else:
                                        break
                                elem.text = elem.text + old_text[i:]
                        if not hasattr(elem.tag, 'find'):
                            continue
                        i = elem.tag.find('}')
                        if i >= 0:
                            elem.tag = elem.tag[i + 1:]
                    objectify.deannotate(child, cleanup_namespaces=True)
                    output_parent.append(child)

            config.log.info('Applying XSLT transformations "{0}"'.format(config.current_profile['xslt']))
            self.transform = etree.XSLT(etree.parse(config.current_profile['xslt']), extensions={('fb2mobi_ns', 'katz_tr'): MyExtElement()})
            transformed = self.transform(self.tree)
            for entry in self.transform.error_log:
                self.log.warning(entry)

            # Make parser re-read XML - in case transformation changed document structure
            transformed_file_name = os.path.join(tempdir, os.path.split(fb2file)[1])
            transformed.write(transformed_file_name, encoding='utf-8', method='xml', xml_declaration=True, pretty_print=False)
            self.tree = etree.parse(transformed_file_name, parser=etree.XMLParser(recover=True))
            try:
                os.remove(transformed_file_name)
            except:
                config.log.info('Unable to remove transformed file "{0}".'.format(transformed_file_name))

        self.root = self.tree.getroot()

        self.hyphenate = config.current_profile['hyphens']
        if self.hyphenate:
            self.replaceNBSP = config.current_profile['hyphensReplaceNBSP']
            self.hyphenator = MyHyphen(self.book_lang)

        self.first_body = True  # Признак первого body
        self.font_list = []
        self.book_uuid = uuid.uuid4()
        self.links_location = {}

    def get_book_authors(self, short=False):
        if self.book_authors:
            if short and len(self.book_authors) > 1:
                return self.book_authors[0] + ' и др' if self.book_lang.lower() == 'ru' else ', et al'
            return ', '.join(self.book_authors)
        else:
            return ''

    def generate(self):

        self.get_notes_dict()

        for child in self.root:
            if ns_tag(child.tag) == 'binary':
                self.parse_binary(child)

        for child in self.root:
            if ns_tag(child.tag) == 'description':
                self.parse_description(child)
            elif ns_tag(child.tag) == 'body':
                self.parse_body(child)

        self.correct_links()
        self.correct_images()

        if self.generate_toc_page:
            self.generate_toc()
        self.generate_cover()
        self.generate_ncx()

        if self.css_file:
            self.copy_css()

        for v in self.vignette_files:
            try:
                copy_file(v, os.path.join(os.path.join(self.temp_content_dir, 'vignettes'), os.path.split(v)[1]))
            except:
                self.log.warning('File {} not found.'.format(v))

        self.generate_pagemap()
        self.generate_opf()
        self.generate_container()
        self.generate_mimetype()

    def copy_css(self):
        base_dir = os.path.abspath(os.path.dirname(self.css_file))
        self.font_list = []

        def replace_url(url):
            source_file = os.path.abspath(os.path.join(base_dir, url))

            if os.path.splitext(url)[1].lower() in ('.ttf', '.otf'):
                dest_file = os.path.abspath(os.path.join(self.temp_content_dir, 'fonts', os.path.basename(source_file)))
                new_url = 'fonts/' + os.path.basename(url)
                self.font_list.append(new_url)
            else:
                dest_file = os.path.abspath(os.path.join(self.temp_content_dir, 'images', 'css_' + os.path.basename(source_file)))
                new_url = 'images/css_' + os.path.basename(url)

            try:
                copy_file(source_file, dest_file)
            except:
                self.log.error('File {0}, referred by css, not found.'.format(url))

            return new_url

        if self.parse_css:

            # Note, macros are temporary, until CSS3 module is fixed and starts to recognize "rem" units
            cssutils.profile.addProfile('CSS extentions', {
                '-webkit-hyphens': 'none',
                'adobe-hyphenate': 'none',
                '-moz-hyphens': 'none',
                '-ms-hyphens': 'none',
                'hyphens': 'none|manual|auto'
            }, {
                'length': r'0|{num}(em|ex|px|in|cm|mm|pt|pc|q|ch|rem|vw|vh|vmin|vmax)',
                'positivelength': r'0|{positivenum}(em|ex|px|in|cm|mm|pt|pc|q|ch|rem|vw|vh|vmin|vmax)',
                'angle': r'0|{num}(deg|grad|rad|turn)'
            })

            stylesheet = cssutils.parseFile(self.css_file)
            cssutils.replaceUrls(stylesheet, replace_url)
            write_file(str(stylesheet.cssText, 'utf-8'), os.path.join(self.temp_content_dir, 'stylesheet.css'))
        else:
            copy_file(self.css_file, os.path.join(self.temp_content_dir, 'stylesheet.css'))

    def correct_links(self):
        for fl in self.html_file_list:
            parser = etree.XMLParser(encoding='utf-8')
            root = etree.parse(os.path.join(self.temp_content_dir, fl), parser).getroot()

            for elem in root.xpath('//xhtml:a', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}):
                link = elem.get('href', '')
                if link and link.startswith('#'):
                    try:
                        elem.set('href', self.links_location[link[1:]] + link)
                    except:
                        pass

            self.buff = str.replace(str(etree.tostring(root, encoding='utf-8', method='xml', xml_declaration=True), 'utf-8'), ' encoding=\'utf-8\'', '', 1)

            self.current_file = fl
            self.write_buff()

    def correct_images(self):
        if self.book_cover:
            # Leave only first cover - drop the rest
            have_cover = False
            covers = []
            for imgid, imgtype, file in self.image_file_list:
                if imgid == self.book_cover:
                    if have_cover:
                        covers.append((imgid, imgtype, file))
                    else:
                        have_cover = True
            for item in covers:
                self.image_file_list.remove(item)
        elif self.kindle and self.cover_default:
            # no cover - if "kindle" provide basic dummy one
            try:
                dst_name = "cover{0:08}.jpg".format(self.image_count)
                copy_file(self.cover_default, os.path.join(os.path.join(self.temp_content_dir, 'images'), dst_name))
                self.image_file_list.append(("dummycover.jpg", "image/jpeg", dst_name))
                self.image_count += 1
                self.book_cover = "dummycover.jpg"
                # default cover will always be stamped
                if self.cover_stamp == 'None':
                    self.cover_stamp = 'Center'
            except:
                self.log.warning('Default cover {} not found.'.format(self.cover_default))

        if self.images_scale > 0.0:
            # if requested - resample all images but cover
            for imgid, _, file in self.image_file_list:
                if imgid != self.book_cover:
                    full_name = os.path.join(self.temp_content_dir, 'images', file)
                    temp_name = os.path.join(self.temp_content_dir, 'images', "image.tmp")
                    try:
                        im = Image.open(full_name)

                        imgfmt = im.format
                        if not imgfmt.lower() in ('jpeg', 'png'):
                            continue

                        dpi = im.info.get("dpi")
                        if not dpi:
                            dpi = (300, 300)

                        im = im.resize((int(im.width * self.images_scale), int(im.height * self.images_scale)), Image.LANCZOS)
                        im.save(temp_name, format=imgfmt, dpi=dpi)
                        os.replace(temp_name, full_name)
                    except:
                        os.remove(temp_name)
                        self.log.warning('Unable to resample image {}. Skipping...'.format(file))

    def write_buff(self, dname='', fname=''):
        if not fname:
            dirname = self.temp_content_dir
            filename = os.path.join(self.temp_content_dir, self.current_file)
        else:
            dirname = dname
            filename = os.path.join(dname, fname)

        if not os.path.exists(dirname):
            os.makedirs(dirname)

        parser = etree.XMLParser(encoding='utf-8', remove_blank_text=True)
        xhtml = etree.parse(io.StringIO(self.get_buff()), parser)
        xhtml.write(filename, encoding='utf-8', method='xml', xml_declaration=True, pretty_print=True)

    # def write_buff_debug(self, dname='', fname=''):
    #     if len(fname) == 0:
    #         dirname = self.temp_content_dir
    #         filename = os.path.join(self.temp_content_dir, self.current_file)
    #     else:
    #         dirname = dname
    #         filename = os.path.join(dname, fname)
    #     write_file(self.get_buff(), filename)

    def write_debug(self, dname):
        self.tree.write(os.path.join(dname, os.path.split(self.orig_file_name)[1]), encoding='utf-8', method='xml', xml_declaration=True, pretty_print=False)

    def parse_note_elem(self, elem, body_name):
        note_title = ''

        if ns_tag(elem.tag) == 'title':

            # this is essetially a hack to preserve notes title (if any) for floating notes

            toc_title = etree.tostring(elem, method='text', encoding='utf-8').decode('utf-8').strip()
            toc_title = re.compile(r'[\[{].*[\]}]').sub('', toc_title)  # Удалим остатки ссылок
            if toc_title:
                # Do real title parsing (notes file is not in pages_list anyways)
                save_buff = self.buff
                self.buff = []
                self.header = True
                self.parse_format(elem, 'div', 'h0')
                self.header = False
                self.notes_titles[body_name] = (toc_title, self.buff[:])
                self.buff = save_buff

        elif ns_tag(elem.tag) == 'section' and 'id' in elem.attrib:
            elid = elem.attrib['id']
            notetext = []
            self.buff = []

            for e in elem:
                if ns_tag(e.tag) == 'title':
                    note_title = etree.tostring(e, method='text', encoding='utf-8').decode('utf-8').strip()
                else:
                    notetext.append(etree.tostring(e, method='text', encoding='utf-8').decode('utf-8').strip())

            self.notes_dict[elid] = (note_title, ' '.join(notetext))
            self.notes_order.append((elid, body_name))
        else:
            for e in elem:
                self.parse_note_elem(e, body_name)

    def get_notes_dict(self):
        self.notes_dict = {}

        notes_bodies = self.notes_bodies.replace(' ', '').split(',')

        for item in self.root:
            if ns_tag(item.tag) == 'body':
                if 'name' in item.attrib:
                    if item.attrib['name'] in notes_bodies:
                        for section in item:
                            self.parse_note_elem(section, item.attrib['name'])

    def get_vignette(self, level, vignette_type):
        vignette = None
        try:
            vignette = self.vignettes[level][vignette_type]
        except:
            try:
                vignette = self.vignettes['default'][vignette_type]
            except:
                pass

        found = False

        if vignette:
            for v in self.vignette_files:
                if v == vignette:
                    found = True
                    break

            if not found:
                self.vignette_files.append(vignette)

        if vignette:
            vignette = os.path.split(vignette)[1]

        return vignette

    def parse_description(self, elem):
        lastname = ''
        middlename = ''
        firstname = ''

        self.log.debug('Parsing description')

        for e in elem:
            if ns_tag(e.tag) == 'document-info':
                for t in e:
                    if ns_tag(t.tag) == 'id':
                        if t.text:
                            try:
                                self.book_uuid = uuid.UUID(t.text)
                            except:
                                pass
                            break
            elif ns_tag(e.tag) == 'title-info':
                for t in e:
                    if ns_tag(t.tag) == 'book-title':
                        if t.text:
                            self.book_title = t.text
                    elif ns_tag(t.tag) == 'lang':
                        if t.text:
                            self.book_lang = t.text if len(t.text) > 2 else t.text.lower()
                        else:
                            self.book_lang = 'ru'

                        if self.book_lang in 'rus':
                            self.book_lang = 'ru'

                        if self.hyphenate and self.hyphenator:
                            try:
                                self.hyphenator.set_language(self.book_lang.replace("-", "_"))
                            except:
                                self.log.warning('Unable to set hyphenation dictionary for language code "{}" - turning hyphenation off'.format(self.book_lang))
                                self.hyphenate = False

                    elif ns_tag(t.tag) == 'coverpage':
                        for c in t:
                            if ns_tag(c.tag) == 'image':
                                for a in c.attrib:
                                    if ns_tag(a) == 'href':
                                        self.book_cover = c.attrib[a][1:]
                                        break
                                    elif ':href' in a:
                                        self.book_cover = c.attrib[a][1:]
                                        self.log.warning('Wrong namespace is used for href attribute for cover page: {0}. Will attempt to recover...'.format(c.attrib))
                                        break

                    elif ns_tag(t.tag) == 'genre':
                        self.genres.append(t.text)

                    elif ns_tag(t.tag) == 'author':

                        for a in t:
                            if ns_tag(a.tag) == 'first-name':
                                firstname = a.text
                            elif ns_tag(a.tag) == 'middle-name':
                                middlename = a.text
                            elif ns_tag(a.tag) == 'last-name':
                                lastname = a.text

                        # pylint: disable=C0330
                        # yapf: disable
                        ba = format_pattern(self.authorformat,
                            [
                                ('#fi', '' if not firstname else firstname[0] + '.'),
                                ('#mi', '' if not middlename else middlename[0] + '.'),
                                ('#f', '' if not firstname else firstname.strip()),
                                ('#m', '' if not middlename else middlename.strip()),
                                ('#l', '' if not lastname else lastname.strip())
                            ])
                        # yapf: enable

                        self.book_authors.append(ba.strip())

                    elif ns_tag(t.tag) == 'sequence':
                        if 'name' in t.attrib:
                            self.book_series = t.attrib['name']
                        if 'number' in t.attrib:
                            self.book_series_num = t.attrib['number']

                    elif ns_tag(t.tag) == 'annotation':
                        self.annotation = etree.tostring(t, method='text', encoding='utf-8').decode('utf-8').strip()

                        if self.generate_annotation_page:
                            self.buff = []
                            self.current_file = 'annotation.xhtml'
                            self.html_file_list.append(self.current_file)

                            self.buff.append(HTMLHEAD)
                            self.buff.append('<div class="annotation"><div class="h1">{0}</div>'.format(self.annotation_title))
                            self.parse_format(t, 'div')
                            self.buff.append('</div>')
                            self.buff.append(HTMLFOOT)

                            self.write_buff()

                    elif ns_tag(t.tag) == 'date':
                        self.book_date = etree.tostring(t, method='text', encoding='utf-8').decode('utf-8').strip()

    def parse_binary(self, elem):
        if 'id' in elem.attrib:
            self.log.debug('Parsing binary {0}'.format(elem.attrib))
            have_file = False
            elid = elem.attrib['id']
            decl_type = elem.attrib['content-type'].lower() if 'content-type' in elem.attrib else '---empty---'
            buff = base64.b64decode(elem.text.encode('ascii'))
            try:
                img = Image.open(io.BytesIO(buff))
                real_type = Image.MIME[img.format]
                imgfmt = img.format.lower()
                filename = "bin{0:08}.{1}".format(self.image_count, imgfmt.lower().replace('jpeg', 'jpg'))
                full_name = os.path.join(os.path.join(self.temp_content_dir, 'images'), filename)
                make_dir(full_name)

                if self.kindle and not imgfmt in ('gif', 'jpeg', 'png', 'bmp'):
                    self.log.warning('Image type "{0}" for ref-id "{1} is not supported by your device. Ignoring...'.format(real_type, elid))
                    return

                if real_type != decl_type:
                    self.log.warning('Declared and detected image types for ref-id "{0}" do not match: "{1}" is not "{2}". Using detected type...'.format(elid, decl_type, real_type))

                if self.removepngtransparency and imgfmt == 'png' and (img.mode in ('RGBA', 'LA') or (img.mode in ('RGB', 'L', 'P') and 'transparency' in img.info)):
                    try:
                        self.log.debug('Removing image transparency for ref-id "{0}" in file "{1}"'.format(elid, filename))
                        if img.mode == "P" and isinstance(img.info.get("transparency"), bytes):
                            img = img.convert("RGBA")
                        if img.mode in ("L", "LA"):
                            bg = Image.new("L", img.size, 255)
                        else:
                            bg = Image.new("RGB", img.size, (255, 255, 255))
                        alpha = img.convert("RGBA").split()[-1]
                        bg.paste(img, mask=alpha)
                        bg.save(full_name, dpi=img.info.get("dpi"))
                        have_file = True
                    except:
                        self.log.warning('Unable to remove transparency for ref-id "{0}" in file "{1}"'.format(elid, filename))
                        self.log.debug('Getting details:', exc_info=True)

                self.image_count += 1

            except:
                # Pillow does not recognize SVG files
                if decl_type.split('/')[1].lower() == 'svg':
                    real_type = 'image/svg+xml'
                    filename = "bin{0:08}.svg".format(self.image_count)
                    full_name = os.path.join(os.path.join(self.temp_content_dir, 'images'), filename)
                    self.image_count += 1
                else:
                    self.log.error('Unable to process binary for ref-id "{0}". Skipping...'.format(elid))
                    # self.log.debug('Getting details:', exc_info=True)
                    return

            if not have_file:
                write_file_bin(buff, full_name)

            self.image_file_list.append((elid, real_type, filename))

    def parse_span(self, span, elem):
        self.parse_format(elem, 'span', span)

    def parse_emphasis(self, elem):
        self.parse_span('emphasis', elem)

    def parse_strong(self, elem):
        self.parse_span('strong', elem)

    def parse_strikethrough(self, elem):
        self.parse_span('strike', elem)

    def parse_style(self, elem):
        self.parse_format(elem, 'span')

    def parse_emptyline(self):
        self.buff.append('<div class="emptyline" />')

    def parse_title(self, elem):
        toc_ref_id = 'tocref{0}'.format(self.toc_index)
        toc_title = etree.tostring(elem, method='text', encoding='utf-8').decode('utf-8').strip()
        toc_title = re.compile(r'[\[{].*[\]}]').sub('', toc_title)  # Удалим остатки ссылок

        if not self.body_name or self.first_header_in_body:
            self.header = True
            self.first_chapter_line = True

            if self.current_header_level < self.chapterlevel:
                self.buff.append('<div class="titleblock" id="{0}">'.format(toc_ref_id))
            else:
                self.buff.append('<div class="titleblock_nobreak" id="{0}">'.format(toc_ref_id))

            if not self.body_name and self.first_header_in_body:
                vignette = self.get_vignette('h0', 'beforeTitle')
                if vignette:
                    self.buff.append('<div class="vignette_title_before"><img src="vignettes/{0}" /></div>'.format(vignette))

                self.parse_format(elem, 'div', 'h0')

                vignette = self.get_vignette('h0', 'afterTitle')
                if vignette:
                    self.buff.append('<div class="vignette_title_after"><img src="vignettes/{0}" /></div>'.format(vignette))

            else:
                level = 'h{0}'.format(self.current_header_level if self.current_header_level <= 6 else 6)

                vignette = self.get_vignette(level, 'beforeTitle')
                if vignette:
                    self.buff.append('<div class="vignette_title_before"><img src="vignettes/{0}" /></div>'.format(vignette))

                self.parse_format(elem, 'div', level)

                vignette = self.get_vignette(level, 'afterTitle')
                if vignette:
                    self.buff.append('<div class="vignette_title_after"><img src="vignettes/{0}" /></div>'.format(vignette))

            self.toc[self.toc_index] = ['{0}#{1}'.format(self.current_file, toc_ref_id), toc_title, self.current_header_level, self.body_name]
        else:
            self.buff.append('<div class="titlenotes" id="{0}">'.format(toc_ref_id))
            self.parse_format(elem, 'div')

        self.buff.append('</div>')
        self.first_header_in_body = False
        self.toc_index += 1
        self.header = False

    def parse_subtitle(self, elem):
        self.subheader = True
        self.parse_format(elem, 'p', 'subtitle')
        self.subheader = False

    def parse_image(self, elem):
        img_id = None
        int_id = None
        alt = None

        for a in elem.attrib:
            if ns_tag(a) == 'href':
                int_id = elem.attrib[a][1:]
            elif ':href' in a:
                self.log.warning('Wrong namespace is used for href attribute in <image>: {0}. Will attempt to recover...'.format(elem.attrib))
                int_id = elem.attrib[a][1:]
            elif ns_tag(a) == 'id':
                img_id = elem.attrib[a]
            elif ns_tag(a) == 'alt':
                alt = elem.attrib[a]

        if not int_id:
            self.log.error('Unable to find image ref-id in "{0}" "{1}.'.format(elem.tag, elem.attrib))
            return

        filename = None
        for imgid, _, file in self.image_file_list:
            if imgid == int_id:
                filename = file
                break
        if not filename:
            self.log.error('Unable to find image for ref-id "{0}" in "{1}" "{2}.'.format(int_id, elem.tag, elem.attrib))
            filename = "nonexistent.gif"
            alt = int_id

        if not alt:
            alt = filename

        if self.inline_image_mode:
            if img_id:
                self.buff.append('<img id="{0}" class="inlineimage" src="images/{1}" alt="{2}"/>'.format(img_id, filename, alt))
            else:
                self.buff.append('<img class="inlineimage" src="images/{0}" alt="{1}"/>'.format(filename, alt))
        else:
            if img_id:
                self.buff.append('<div id="{0}" class="image">'.format(img_id))
            else:
                self.buff.append('<div class="image">')
            self.buff.append('<img src="images/{0}" alt="{1}"/>'.format(filename, alt))
            self.buff.append('</div>')

        self.parse_format(elem)

    def parse_a(self, elem):
        href = None
        for name in elem.attrib:
            if ns_tag(name) == 'href':
                href = elem.attrib[name]
                break
            elif ':href' in name:
                self.log.warning('Wrong namespace is used for href attribute in <a>: {0}. Will attempt to recover...'.format(elem.attrib))
                href = elem.attrib[name]
                break
        if not href:
            self.log.error('Unable to find href attribute in <a>: {0}.'.format(elem.attrib))

        self.parse_format(elem, 'a', 'anchor', href=href)

    def parse_p(self, elem):
        ptag = 'p'
        pcss = None

        if self.header:
            pcss = 'title'

        self.parse_format(elem, ptag, pcss)

    def parse_poem(self, elem):
        self.no_paragraph = True
        self.parse_format(elem, 'div', 'poem')
        self.no_paragraph = False

    def parse_stanza(self, elem):
        self.parse_format(elem, 'div', 'stanza')

    def parse_v(self, elem):
        self.parse_format(elem, 'p')

    def parse_cite(self, elem):
        self.parse_format(elem, 'div', 'cite')

    def parse_textauthor(self, elem):
        self.no_paragraph = True
        self.parse_format(elem, 'div', 'text-author')
        self.no_paragraph = False

    def parse_annotation(self, elem):
        self.no_paragraph = True
        self.parse_format(elem, 'div', 'annotation')
        self.no_paragraph = False

    def parse_table(self, elem):
        self.buff.append('<table class="table"')
        for attr in elem.attrib:
            self.buff.append(' {0}="{1}"'.format(attr, elem.attrib[attr]))
        self.buff.append('>')
        self.parse_format(elem)
        self.buff.append('</table>')

    def parse_epigraph(self, elem):
        self.no_paragraph = True
        self.parse_format(elem, 'div', 'epigraph')
        self.no_paragraph = False

    def parse_code(self, elem):
        self.parse_format(elem, 'code')

    def parse_other(self, elem):
        self.parse_format(elem, ns_tag(elem.tag))

    def parse_section(self, elem):

        if not self.body_name and self.current_header_level == 0 and self.first_header_in_body:

            # We encountered main body without a title - need to add it forcefully, otherwise toc and books structure would be wrong

            toc_ref_id = 'tocref{0}'.format(self.toc_index)
            if len(self.book_authors) == 1:
                toc_title = self.book_authors[0] + " " + self.book_title
            else:
                toc_title = self.book_title

            if self.current_header_level < self.chapterlevel:
                self.buff.append('<div class="titleblock" id="{0}">'.format(toc_ref_id))
            else:
                self.buff.append('<div class="titleblock_nobreak" id="{0}">'.format(toc_ref_id))

            vignette = self.get_vignette('h0', 'beforeTitle')
            if vignette:
                self.buff.append('<div class="vignette_title_before"><img src="vignettes/{0}" /></div>'.format(vignette))

            self.buff.append('<div class ="h0">')
            for a in self.book_authors:
                self.buff.append('<p class="title">{0}</p>'.format(a))
            self.buff.append('<p class="title">{0}</p>'.format(self.book_title))
            self.buff.append('</div>')

            vignette = self.get_vignette('h0', 'afterTitle')
            if vignette:
                self.buff.append('<div class="vignette_title_after"><img src="vignettes/{0}" /></div>'.format(vignette))

            self.toc[self.toc_index] = ['{0}#{1}'.format(self.current_file, toc_ref_id), toc_title, self.current_header_level, '']

            self.buff.append('</div>')
            self.first_header_in_body = False
            self.toc_index += 1

        self.current_header_level = self.current_header_level + 1

        if not self.body_name:
            if self.chaptersplit and self.current_header_level < self.chapterlevel:
                self.buff.append(HTMLFOOT)
                self.write_buff()

                self.buff = []
                self.current_file_index += 1
                self.current_file = 'index{0}.xhtml'.format(self.current_file_index)
                self.html_file_list.append(self.current_file)
                self.buff.append(HTMLHEAD)

                self.pages_list[self.current_file] = 0
                self.page_length = 0

        self.parse_format(elem, tag='div', css='section')

        if not self.body_name:
            level = 'h{0}'.format(self.current_header_level if self.current_header_level <= 6 else 6)
            vignette = self.get_vignette(level, 'chapterEnd')
            if vignette:
                self.buff.append('<p class="vignette_chapter_end"><img src="vignettes/{0}" /></p>'.format(vignette))
            self.buff.append('<div class="chapter_end"/>')

        self.current_header_level = max(0, self.current_header_level - 1)

    def parse_date(self, elem):
        self.parse_format(elem, 'time')

    def parse_format(self, elem, tag=None, css=None, href=None):
        dodropcaps = 0

        if elem.text:
            # Обработка dropcaps
            if self.first_chapter_line and not (self.header or self.subheader or self.body_name or self.no_paragraph):
                if tag == 'p':
                    if self.dropcaps == 'simple':
                        if elem.text[0] not in self.nodropcaps:
                            dodropcaps = 1
                            css = 'dropcaps'
                    elif self.dropcaps == 'smart':
                        for i, c in enumerate(elem.text):
                            if c not in self.nodropcaps and not c.isspace():
                                dodropcaps = i + 1
                                css = 'dropcaps'
                                break
                self.first_chapter_line = False

        if self.notes_mode in ('inline', 'block') and tag == 'a':
            note_id = href[1:]
            try:
                note = self.notes_dict[note_id]
                self.current_notes.append(note)
                tag = 'span'
                css = '{0}anchor'.format(self.notes_mode)
                href = None
            except KeyError:
                pass
        elif self.notes_mode in ('default', 'float') and tag == 'a':
            if href[1:] in self.notes_dict:
                elem.set('id', 'back_' + href[1:])
            else:
                css = 'linkanchor'

        if tag:
            self.buff.append('<{0}'.format(tag))
            if css:
                self.buff.append(' class="{0}"'.format(css))
            if 'id' in elem.attrib:
                new_id = save_html(sanitize_id(elem.attrib['id']))
                if new_id != elem.attrib['id']:
                    self.log.warning('id "{}" for tag "{}" was sanitized. This may create problems with links (TOC, notes) - it is better to fix original file.'.format(new_id, tag))
                self.buff.append(' id="{}"'.format(new_id))
                self.links_location[new_id] = self.current_file
            if href:
                self.buff.append(' href="{}"'.format(save_html(href)))
            if tag == 'p':
                # Для inline-картинок
                self.inline_image_mode = True
            if css == 'section':
                self.buff.append(' />')
            else:
                self.buff.append('>')

        if elem.text:
            if self.current_file in self.pages_list and self.page_length + len(elem.text) >= self.characters_per_page:
                page = self.pages_list[self.current_file]
                text = ''

                for w in elem.text.split(' '):
                    if not text:
                        text = ' ' if not w else w
                    else:
                        text = ' '.join([text, w])
                    if self.page_length + len(text) >= self.characters_per_page:
                        hs = self.insert_hyphenation(text)
                        if dodropcaps > 0:
                            self.buff.append('<span class="dropcaps">{}</span>{}'.format(save_html(hs[0:dodropcaps]), save_html(hs[dodropcaps:])))
                            dodropcaps = 0
                        else:
                            self.buff.append(save_html(hs))
                        self.buff.append('<a class="pagemarker" id="page_{0:d}"/> '.format(page))
                        page += 1
                        text = ''
                        self.page_length = 0

                self.page_length = len(text)
                if text:
                    hs = self.insert_hyphenation(text)
                    if dodropcaps > 0:
                        self.buff.append('<span class="dropcaps">{}</span>{}'.format(save_html(hs[0:dodropcaps]), save_html(hs[dodropcaps:])))
                    else:
                        self.buff.append(save_html(hs))
                self.pages_list[self.current_file] = page
            else:
                self.page_length += len(elem.text)
                hs = self.insert_hyphenation(elem.text)
                if dodropcaps > 0:
                    self.buff.append('<span class="dropcaps">{}</span>{}'.format(save_html(hs[0:dodropcaps]), save_html(hs[dodropcaps:])))
                else:
                    self.buff.append(save_html(hs))

        for e in elem:
            if e.tag == etree.Comment:
                continue
            if ns_tag(e.tag) == 'title':
                self.parse_title(e)
            elif ns_tag(e.tag) == 'subtitle':
                self.parse_subtitle(e)
            elif ns_tag(e.tag) == 'epigraph':
                self.parse_epigraph(e)
            elif ns_tag(e.tag) == 'annotation':
                self.parse_annotation(e)
            elif ns_tag(e.tag) == 'section':
                self.parse_section(e)
            elif ns_tag(e.tag) == 'strong':
                self.parse_strong(e)
            elif ns_tag(e.tag) == 'emphasis':
                self.parse_emphasis(e)
            elif ns_tag(e.tag) == 'strikethrough':
                self.parse_strikethrough(e)
            elif ns_tag(e.tag) == 'style':
                self.parse_style(e)
            elif ns_tag(e.tag) == 'a':
                self.parse_a(e)
            elif ns_tag(e.tag) == 'image':
                self.parse_image(e)
            elif ns_tag(e.tag) == 'p':
                self.parse_p(e)
            elif ns_tag(e.tag) == 'poem':
                self.parse_poem(e)
            elif ns_tag(e.tag) == 'stanza':
                self.parse_stanza(e)
            elif ns_tag(e.tag) == 'v':
                self.parse_v(e)
            elif ns_tag(e.tag) == 'cite':
                self.parse_cite(e)
            elif ns_tag(e.tag) == 'empty-line':
                self.parse_emptyline()
            elif ns_tag(e.tag) == 'text-author':
                self.parse_textauthor(e)
            elif ns_tag(e.tag) == 'table':
                self.parse_table(e)
            elif ns_tag(e.tag) == 'code':
                self.parse_code(e)
            elif ns_tag(e.tag) == 'date':
                self.parse_date(e)
            elif ns_tag(e.tag) == 'tr':
                self.parse_table_element(e)
            elif ns_tag(e.tag) == 'td':
                self.parse_table_element(e)
            elif ns_tag(e.tag) == 'th':
                self.parse_table_element(e)
            else:
                self.parse_other(e)

        if tag:
            if css == 'section':
                pass
            else:
                self.buff.append('</{0}>'.format(tag))
            # Для inline-картинок
            if tag == 'p':
                self.inline_image_mode = False

            if self.current_notes:
                if self.notes_mode == 'inline' and tag == 'span':
                    self.buff.append('<span class="inlinenote">{0}</span>'.format(save_html(self.insert_hyphenation(''.join(self.current_notes[0][1])))))
                    self.current_notes = []
                elif self.notes_mode == 'block' and tag == 'p':
                    self.buff.append('<div class="blocknote">')
                    for note in self.current_notes:
                        if note[1]:
                            self.buff.append('<p><span class="notenum">{0}) </span>{1}</p>'.format(save_html(note[0]), save_html(self.insert_hyphenation(''.join(note[1])))))
                    self.buff.append('</div>')
                    self.current_notes = []

        if elem.tail:
            self.page_length += len(elem.tail)
            self.buff.append(save_html(self.insert_hyphenation(elem.tail)))

    def parse_table_element(self, elem):
        self.buff.append('<{0}'.format(ns_tag(elem.tag)))

        for attr in elem.attrib:
            self.buff.append(' {0}="{1}"'.format(attr, elem.attrib[attr]))

        self.buff.append('>')
        self.parse_format(elem)
        self.buff.append('</{0}>'.format(ns_tag(elem.tag)))

    def insert_hyphenation(self, s):
        if not s:
            return ''
        return html.unescape(s) if not self.hyphenate or not self.hyphenator or self.header or self.subheader else self.hyphenator.hyphenate_text(html.unescape(s), self.replaceNBSP)

    def parse_body(self, elem):

        self.log.debug('Parsing body: {0}'.format(elem.attrib))

        self.body_name = elem.attrib['name'] if 'name' in elem.attrib else ''
        self.current_header_level = 0
        self.first_header_in_body = True

        if self.first_body:
            self.first_body = False
            self.body_name = ''

        self.buff = []
        self.buff.append(HTMLHEAD)

        if not self.body_name:
            self.current_file_index += 1
            self.current_file = 'index{0}.xhtml'.format(self.current_file_index)
            self.html_file_list.append(self.current_file)
        else:
            self.current_file = '{0}.xhtml'.format(hashlib.md5(bytes(self.body_name, 'utf-8')).hexdigest())
            self.html_file_list.append(self.current_file)

        self.pages_list[self.current_file] = 0
        self.page_length = 0

        if self.notes_mode in ('inline', 'block', 'float'):
            notes_bodies = self.notes_bodies.replace(' ', '').split(',')
            if self.body_name not in notes_bodies:
                self.parse_format(elem)
            elif self.notes_mode == 'float':

                # To satisfy Amazon's requirements for floating notes I have to create notes body on the fly here, removing most of the formatting

                if self.notes_order:
                    if self.body_name in self.notes_titles:
                        toc_title = self.notes_titles[self.body_name][0]
                        title = ''.join(self.notes_titles[self.body_name][1])
                    else:
                        toc_title = self.body_name[0].upper() + self.body_name[1:]
                        title = '<div class="h0"><p class="title">{0}</p></div>'.format(toc_title)

                    toc_ref_id = 'tocref{0}'.format(self.toc_index)
                    self.buff.append('<div class="titleblock" id="{0}">'.format(toc_ref_id))

                    vignette = self.get_vignette('h0', 'beforeTitle')
                    if vignette:
                        self.buff.append('<div class="vignette_title_before"><img src="vignettes/{0}" /></div>'.format(vignette))

                    self.buff.append(title)

                    vignette = self.get_vignette('h0', 'afterTitle')
                    if vignette:
                        self.buff.append('<div class="vignette_title_after"><img src="vignettes/{0}" /></div>'.format(vignette))

                    self.toc[self.toc_index] = ['{0}#{1}'.format(self.current_file, toc_ref_id), toc_title, 0, self.body_name]

                    self.buff.append('</div>')
                    self.toc_index += 1

                for noteid, body_name in self.notes_order:
                    if body_name == self.body_name:
                        note = self.notes_dict[noteid]
                        noteid_b = 'back_' + noteid
                        self.links_location[noteid] = self.current_file
                        # Sometimes due to an error document does not have a reference to note and numbers are all messed up
                        back_ref = 'nowhere'
                        try:
                            back_ref = self.links_location[noteid_b]
                        except:
                            pass
                        self.buff.append('<p class="floatnote"><a href="{0}#{1}" id="{2}">{3}).</a>&#160;{4}</p>'.format(back_ref, noteid_b, noteid, save_html(note[0]) if note[0] else '***', save_html(note[1])))
                    else:
                        continue
        else:
            self.parse_format(elem)

        self.buff.append(HTMLFOOT)
        self.write_buff()

    def generate_toc(self):
        self.buff = []
        self.buff.append(HTMLHEAD)
        self.current_file = 'toc.xhtml'

        self.buff.append('<div class="toc">')
        self.buff.append('<div class="h1" id="toc">{0}</div>'.format(self.toc_title))
        for (_, item) in self.toc.items():

            if item[2] <= self.toc_max_level:  # Ограничение уровня вложенности секций для TOC
                if item[3] == '':
                    ind = item[2] if item[2] <= 6 else 6
                    if ind == 0:
                        lines = item[1].splitlines()
                        self.buff.append('<div class="indent0"><a href="{0}">'.format(item[0]))
                        for line in lines:
                            if line.strip():
                                self.buff.append(save_html(line.strip()) + '<br/>')
                        self.buff.append('</a></div>')
                    else:
                        self.buff.append('<div class="indent{0}"><a href="{1}">{2}</a></div>'.format(ind, item[0], save_html(' '.join(item[1].split()))))
                else:
                    self.buff.append('<div class="indent0"><a href="{0}">{1}</a></div>'.format(item[0], save_html(' '.join(item[1].split()))))

        self.buff.append('</div>')
        self.buff.append(HTMLFOOT)

        self.write_buff()
        self.html_file_list.append(self.current_file)

    def ncx_navp_beg(self, index, title, link):
        self.buff.append('<navPoint id="navpoint{0}" playOrder="{1}">'.format(index, index))
        self.buff.append('<navLabel><text>{0}</text></navLabel>'.format(title))
        self.buff.append('<content src="{0}" />'.format(link))

    def ncx_navp_end(self):
        self.buff.append('</navPoint>')

    def generate_ncx(self):
        self.buff = []
        self.buff.append('<?xml version="1.0"?>' '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="en-US">' '<head>')
        self.buff.append('<meta name="dtb:uid" content="urn:uuid:{0}"/>'.format(self.book_uuid))
        self.buff.append('</head>' '<docTitle>' '<text>fb2mobi.py</text>' '</docTitle>' '<navMap>')
        i = 1

        # Включим содержание в навигацию, если содержание помещается в начале книги
        if self.tocbeforebody and self.toc.items() and self.generate_toc_page:
            self.ncx_navp_beg(i, self.toc_title, 'toc.xhtml')
            self.ncx_navp_end()
            i += 1

        # First (book title) on the same level as the rest, if you want everything be under it do ncx_level = -1
        ncx_level = 2
        ncx_barrier = sys.maxsize

        if self.toc_type in 'flat':
            ncx_level = sys.maxsize
            ncx_barrier = 1

        if self.toc_type in 'kindle':
            ncx_level = self.toc_kindle_level
            ncx_barrier = 1

        history = []
        prev_item: Tuple[int, str] = ()
        for (_, item) in self.toc.items():
            if prev_item is ():  # first time
                self.ncx_navp_beg(i, save_html(' '.join(item[1].split())), item[0])
                history.append(item[2])
                i += 1
            elif prev_item[2] < item[2]:
                if item[2] < ncx_level or len(history) > ncx_barrier:
                    self.ncx_navp_end()
                    history.pop()
                self.ncx_navp_beg(i, save_html(' '.join(item[1].split())), item[0])
                history.append(item[2])
                i += 1
            elif prev_item[2] == item[2]:  # Same level
                self.ncx_navp_end()
                self.ncx_navp_beg(i, save_html(' '.join(item[1].split())), item[0])
            elif prev_item[2] > item[2]:  # Going out
                while history != [] and history[len(history) - 1] >= item[2]:
                    self.ncx_navp_end()
                    history.pop()
                self.ncx_navp_beg(i, save_html(' '.join(item[1].split())), item[0])
                history.append(item[2])
                i += 1
            else:
                assert False
            prev_item = item

        # Whatever levels are open - close them
        while history != []:
            self.ncx_navp_end()
            history.pop()

        # Включим содержание в навигацию, если содержание помещается в конце книги
        if not self.tocbeforebody and self.toc.items() and self.generate_toc_page:
            self.ncx_navp_beg(i, self.toc_title, 'toc.xhtml')
            self.ncx_navp_end()

        self.buff.append('</navMap></ncx>')
        self.write_buff(self.temp_content_dir, 'toc.ncx')

    def generate_mimetype(self):
        mimetype = 'application/epub+zip'
        write_file(mimetype, os.path.join(self.temp_dir, 'mimetype'))

    def generate_container(self):
        self.buff = []
        self.buff.append('<?xml version="1.0"?>'
                         '<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
                         '<rootfiles>'
                         '<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>'
                         '</rootfiles>'
                         '</container>')
        self.write_buff(self.temp_inf_dir, 'container.xml')

    def stamp_cover(self, img):


        # Most of numbers/sizes are totally arbitrary...

        if self.cover_stamp == 'None':
            return

        if not self.cover_font or not os.path.isfile(self.cover_font):
            self.log.warn('Unable to place stamp on cover - coverFont is not specified')
            return

        title = '' if not self.book_title else self.book_title.strip()
        series = '' if not self.book_series else self.book_series.strip()
        if self.book_series_num:
            series = '{0}: {1}'.format(series, self.book_series_num.strip())
        author = self.get_book_authors()
        if len(author) > 25:
            author = author[0:25] + u"\u2026"

        # tuning
        h = img.height // 4
        fh = max(10, h // 6)
        off = fh // 4

        if self.cover_stamp == 'Top':
            pos = (0, 0)
        elif self.cover_stamp == 'Bottom':
            pos = (0, img.height - h)
        else:
            pos = (0, (img.height - h) // 2)

        overlay = Image.new('RGBA', (img.width, h), color=(0, 0, 0, 200))

        step = off
        if title:
            _, h = ImageText(overlay).write_text_box((off, step), title, box_width=img.width - off, font_filename=self.cover_font, font_size=fh, color=(255, 255, 255))
            step += h
        if series:
            _, h = ImageText(overlay).write_text_box((off, step + off), series, box_width=img.width - off, font_filename=self.cover_font, font_size=fh, color=(255, 255, 255))
            step += h
        if author:
            ImageText(overlay).write_text_box((off, step + off), author, box_width=img.width - off, font_filename=self.cover_font, font_size=fh, color=(255, 255, 255))

        img.paste(overlay, pos)

    def generate_cover(self):
        filename = None
        if self.book_cover:
            for imgid, _, file in self.image_file_list:
                if imgid == self.book_cover:
                    filename = file
                    break
            if not filename:
                self.log.error('Unable to find book cover image for ref-id "{0}". Disabling book cover...'.format(self.book_cover))
                self.book_cover = ''
                return

            # make sure kindlegen does not complain on cover size and make sure that epub cover takes whole screen
            full_name = os.path.join(self.temp_content_dir, 'images', filename)
            im = Image.open(full_name)
            if im.height < self.screen_height:
                im = im.resize((int(self.screen_height * im.width / im.height), self.screen_height), Image.LANCZOS)

            self.stamp_cover(im)
            # im.save(full_name, optimize=True, quality=50)
            im.save(full_name)

            if not self.kindle:
                self.buff = []
                self.buff.append(HTMLHEAD)
                self.buff.append('<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100%" height="100%" viewBox="0 0 {0} {1}" preserveAspectRatio="xMidYMid meet">'.format(
                    self.screen_width, self.screen_height))
                self.buff.append('<image width="{0}" height="{1}" xlink:href="images/{2}" />'.format(self.screen_width, self.screen_height, filename))
                self.buff.append('</svg>')
                self.buff.append(HTMLFOOT)
                self.current_file = 'cover.xhtml'

                self.write_buff()

    def generate_pagemap(self):
        page = 1
        self.buff = []
        self.buff.append('<?xml version = "1.0" ?>' '<page-map xmlns = "http://www.idpf.org/2007/opf">')

        if self.book_cover and not self.kindle:
            self.buff.append('<page name="{0}" href="cover.xhtml"/>'.format(page))
            page += 1
        if self.tocbeforebody and self.generate_toc_page:
            self.buff.append('<page name="{0}" href="toc.xhtml"/>'.format(page))
            page += 1

        for item in self.html_file_list:
            if item != 'toc.xhtml':
                self.buff.append('<page name="{0}" href="{1}"/>'.format(page, item))
                page += 1
                if item in self.pages_list:
                    for p in range(0, self.pages_list[item]):
                        self.buff.append('<page name="{0:d}" href="{1:s}#page_{2:d}"/>'.format(page, item, p))
                        page += 1

        if not self.tocbeforebody and self.generate_toc_page:
            self.buff.append('<page name="{0}" href="toc.xhtml"/>'.format(page))
            page += 1

        self.buff.append('</page-map>')
        self.write_buff(self.temp_content_dir, 'page-map.xml')

    def generate_opf(self):
        self.buff = []
        self.buff.append('<?xml version="1.0" ?>' '<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookId">' '<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">')

        title = self.book_title

        if self.booktitleformat:
            # pylint: disable=C0330
            # yapf: disable
            title = format_pattern(self.booktitleformat,
                [
                    ('#title', '' if not self.book_title else self.book_title.strip()),
                    ('#series', '' if not self.book_series else self.book_series.strip()),
                    ('#abbrseries', ''.join(word[0] for word in self.book_series.split()).lower() if self.book_series else ''),
                    ('#number', '' if not self.book_series_num else self.book_series_num.strip()),
                    ('#padnumber', '' if not self.book_series_num else self.book_series_num.strip().zfill(self.seriespositions)),
                    ('#date', '' if not self.book_date else self.book_date.strip())
                ])
            # yapf: enable
            if not title:
                title = self.book_title

        title = title.strip()
        if self.transliterate_author_and_title:
            title = slugify(title, separator=' ')

        self.buff.append('<dc:title>{0}</dc:title>'.format(save_html(title)))
        self.buff.append('<dc:language>{0}</dc:language>'.format(self.book_lang))
        self.buff.append('<dc:identifier id="BookId" opf:scheme="uuid">urn:uuid:{0}</dc:identifier>'.format(self.book_uuid))
        for a in self.book_authors:
            if self.transliterate_author_and_title:
                a = slugify(a, ' ')
            self.buff.append('<dc:creator opf:role="aut">{0}</dc:creator>'.format(save_html(a)))
        self.buff.append('<dc:publisher />')

        for genre in self.genres:
            self.buff.append('<dc:subject>{0}</dc:subject>'.format(genre))

        if self.annotation:
            self.buff.append('<dc:description>{0}</dc:description>'.format(save_html(self.annotation)))

        if self.book_cover:
            self.buff.append('<meta name="cover" content="cover-image" />')

        self.buff.append('</metadata>')
        self.buff.append('<manifest>' '<item id="ncx" media-type="application/x-dtbncx+xml" href="toc.ncx"/>' '<item id = "map" media-type="application/oebps-page-map+xml" href="page-map.xml"/>')

        for item in self.html_file_list:
            self.buff.append('<item id="{0}" media-type="application/xhtml+xml" href="{1}"/>'.format(item.split('.')[0], item))

        item_id = 0
        for imgid, imgtype, filename in self.image_file_list:
            if imgid == self.book_cover:
                self.buff.append('<item id="cover-image" media-type="{0}" href="images/{1}"/>'.format(imgtype, filename))
                if not self.kindle:
                    self.buff.append('<item id="cover-page" href="cover.xhtml" media-type="application/xhtml+xml"/>')
            else:
                self.buff.append('<item id="image{0}" media-type="{1}" href="images/{2}"/>'.format(item_id, imgtype, filename))

            item_id += 1

        for item in self.vignette_files:
            item_file = os.path.split(item)[1]
            item_type = os.path.splitext(item_file)[1]
            item_type = item_type[1:]

            if item_type == 'jpg':
                item_type = 'jpeg'

            self.buff.append('<item id="image{0}" media-type="image/{1}" href="vignettes/{2}"/>'.format(item_id, item_type, item_file))
            item_id += 1

        self.buff.append('<item id="style" href="stylesheet.css" media-type="text/css"/>')

        font_id = 0
        for f in self.font_list:
            if f.lower().endswith('.otf'):
                self.buff.append('<item id="font{0}" href="{1}" media-type="application/opentype"/>'.format(font_id, f))
            else:
                self.buff.append('<item id="font{0}" href="{1}" media-type="application/x-font-ttf"/>'.format(font_id, f))
            font_id += 1

        self.buff.append('</manifest>' '<spine page-map="map" toc="ncx">')

        if self.book_cover and not self.kindle:
            self.buff.append('<itemref idref="cover-page" linear="no"/>')
        if self.tocbeforebody and self.generate_toc_page:
            self.buff.append('<itemref idref="toc"/>')

        for item in self.html_file_list:
            if item != 'toc.xhtml':
                self.buff.append('<itemref idref="{0}"/>'.format(item.split('.')[0]))

        if not self.tocbeforebody and self.generate_toc_page:
            self.buff.append('<itemref idref="toc"/>')

        self.buff.append('</spine>')

        if self.generate_opf_guide:
            self.buff.append('<guide>')
            if self.book_cover and not self.kindle:
                self.buff.append('<reference type="cover-page" href="cover.xhtml"/>')

            if self.open_book_from_cover and self.book_cover:
                self.buff.append('<reference type="text" title="book" href="cover.xhtml"/>')
            else:
                for item in self.html_file_list:
                    if item.split('.')[0].startswith('index'):
                        self.buff.append('<reference type="text" title="Starts here" href="{0}"/>'.format(item))
                        break

            self.buff.append('<reference type="toc" title="Table of Contents" href="toc.xhtml"/>')
            self.buff.append('</guide>')

        self.buff.append('</package>')

        self.write_buff(self.temp_content_dir, 'content.opf')

    def get_buff(self):
        return ''.join(self.buff)