python source code of ng

"""This module is used for Newgrounds parsing."""

from collections import namedtuple
from itertools import chain
import re

from yarl import URL

from gd.utils.parser import ExtDict
from gd.typing import Dict, HTMLElement, List, TypeVar, Union, XMLElement

use_lxml, Element = False, XMLElement
try:
    from lxml import html

    use_lxml, Element = True, HTMLElement
except ImportError:
    try:
        from html5lib import parse
    except ImportError:
        print("Failed to import lxml and html5lib. Newgrounds parsing will not be supported.")

__all__ = (
    "re_link",
    "re_name",
    "re_author",
    "re_size",
    "re_attrib",
    "find_song_info",
    "extract_info_from_endpoint",
    "search_song_data",
    "extract_user_songs",
    "extract_users",
)

T = TypeVar("T")
U = TypeVar("U")

re_link, re_size, re_name, re_author, re_attrib = (
    r"https://audio\.ngfiles\.com/([^\"']+)",
    r".filesize.:(\d+)",
    r"<title>([^<>]+)</title>",
    r".artist.:.([^\"']+).",
    r"{}[ ]*=[ ]*(?P<quote>[\"'])(.*?)(?P=quote)",
)
re_class = re_attrib.format("class")

SongInfo = namedtuple("SongInfo", "link size name author")


def html_parse(text: str) -> Element:
    text = re.sub(re_class, lambda match: match.group(0).replace(" ", ""), text)

    if use_lxml:
        return html.fromstring(text)
    else:
        return parse(text, "etree", False)


def find_song_info(text: str) -> SongInfo:
    try:
        return SongInfo(
            link=re.search(re_link, text).group(0),
            size=int(re.search(re_size, text).group(1)),
            name=re.search(re_name, text).group(1),
            author=re.search(re_author, text).group(1),
        )
    except AttributeError:  # not found
        raise ValueError("Song info was not found.") from None


def extract_info_from_endpoint(text: str) -> ExtDict:
    artist, whitelisted, scouted, song, api, *_ = filter(is_not_empty, re.split(r"</?br>", text))
    return ExtDict(
        artist=artist.split("Artist: ").pop(),
        song=song.split("Song: ").pop(),
        whitelisted=check_not(whitelisted),
        scouted=check_not(scouted),
        api=check_not(api),
    )


def search_song_data(text: str) -> List[ExtDict]:
    tree, result = html_parse(text), []

    for a, div in zip(
        tree.findall(r'.//a[@class="item-audiosubmission"]'),
        tree.findall(r'.//div[@class="detail-title"]'),
    ):
        url = URL(a.attrib["href"]).with_scheme("https")
        song_id = int(url.parts[-1])

        h4, span, *_ = div.getchildren()

        name = switch_if_none(h4.text, "") + "".join(
            switch_if_none(mark.text, "") + switch_if_none(mark.tail, "")
            for mark in h4.getchildren()
        )
        author = span.getchildren()[0].text

        result.append(ExtDict(id=song_id, name=name, author=author, links={"normal": str(url)}))

    return result


def extract_user_songs(
    json: Dict[str, Dict[str, Dict[str, Union[Dict[str, str], List[str]]]]]
) -> List[ExtDict]:
    result = []

    try:
        years = json["years"].values()
    except (TypeError, AttributeError):  # not found
        return result

    for entry in chain.from_iterable(year["items"] for year in years):
        tree = html_parse(entry)
        a = tree.findall(r'.//a[@class="item-link"]')[0]

        url = URL(a.attrib["href"]).with_scheme("https")
        song_id = int(url.parts[-1])

        name = a.attrib["title"]

        result.append(ExtDict(id=song_id, name=name, links={"normal": str(url)}))

    return result


def extract_users(text: str) -> List[ExtDict]:
    tree, result = html_parse(text), []

    for a in tree.findall(r'.//div[@class="item-details-main"]/h4/a'):
        url = URL(a.attrib["href"]).with_scheme("https")
        name = a.text

        result.append(ExtDict(link=url, name=name))

    return result


def switch_if_none(obj: T, to: U) -> Union[T, U]:
    if obj is None:
        return to
    return obj


def check_not(string: str) -> bool:
    return "not" not in string.casefold()


def is_not_empty(string: str) -> bool:
    return len(string) > 0