"""Python Cookbook

Chapter 9, recipe 6.

Parse the HTML file and produce the JSON
and XML files.
"""
from bs4 import BeautifulSoup
from pathlib import Path

def null_int(text):
    if text:
        return int(text)


def clean_leg(text):
    leg_soup = BeautifulSoup(text, 'html.parser')
    return leg_soup.text


def get_html():
    source_path = Path("Volvo Ocean Race.html")
    with source_path.open(encoding='utf8') as source_file:
        soup = BeautifulSoup(source_file, 'html.parser')

    legs = []
    thead = soup.table.thead.tr
    for tag in thead.find_all('th'):
        if 'data-title' in tag.attrs:
            leg_description_text = clean_leg(tag.attrs['data-title'])
            legs.append(leg_description_text)
        else:
            print(tag.attrs, tag.string)

    teams = []
    tbody = soup.table.tbody
    for row in tbody.find_all('tr'):
        team = {'name': None, 'position': []}
        for col in row.find_all('td'):
            if 'ranking-team' in col.attrs.get('class'):
                team['name'] = col.string
            elif 'ranking-number' in col.attrs.get('class'):
                team['position'].append(null_int(col.string))
            else:
                print(col.attrs, col.string)
        # Totals may be included.
        team['position'] = team['position'][:len(legs)]
        teams.append(team)

    document = {
        'legs': legs,
        'teams': teams,
    }
    return document

def show_json(document):
    import json
    print(json.dumps(document, indent=2))

def show_xml(document):
    from xml.etree import ElementTree as XML

    xml_document = XML.Element("results")
    legs_xml = XML.SubElement(xml_document, 'legs')
    for n, leg in enumerate(document['legs'], start=1):
        leg_xml = XML.SubElement(legs_xml, 'leg', n=str(n))
        leg_xml.text = leg

    teams_xml = XML.SubElement(xml_document, 'teams')
    for team in document['teams']:
        team_xml = XML.SubElement(teams_xml, "team")
        name_xml = XML.SubElement(team_xml, "name")
        name_xml.text = team['name']
        position_xml = XML.SubElement(team_xml, "position")
        for n, position in enumerate(team['position'], start=1):
            leg_xml = XML.SubElement(position_xml, "leg", n=str(n))
            leg_xml.text = str(position)

    pi = XML.ProcessingInstruction("xml", 'version="1.0"')
    XML.dump(pi)
    XML.dump(xml_document)

if __name__ == "__main__":

    document = get_html()

    print("-"*20)

    show_json(document)

    print("-"*20)

    show_xml(document)