python source code of gatherling

Penny-Dreadful-Tools-master
- maintenance
  - populate_woosh_index.py
  - delete_unplayed_retired_decks.py
  - calculate_similar_decks.py
  - validate_translations_test.py
  - squash_people.py
  - reprime_cache.py
  - validate_translations.py
  - elo.py
  - process_logs.py
  - insert_seasons.py
  - client_dependencies.py
  - generate_translations.py
  - update_translations.py
  - post_rotation.py
  - __init__.py
  - deck_hash.py
  - dev_db.py
  - guess_league_archetypes.py
  - gatherling_substitutions.py
- logsite
  - db.py
  - views
    - matches.py
    - charts.py
    - internal_server_error.py
    - not_found.py
    - match_view.py
    - home.py
    - __init__.py
    - about.py
    - people.py
    - error.py
  - importing.py
  - templates
    - game_view.mustache
    - footer.mustache
    - matchtable.mustache
    - people.mustache
    - match.mustache
    - matches.mustache
    - about.mustache
    - header.mustache
    - menu.mustache
    - home.mustache
    - pagination.mustache
    - matchrow.mustache
    - charts.mustache
    - spinner.mustache
  - database.py
  - stats.py
  - api.py
  - data
    - match.py
    - tournament.py
    - game.py
    - __init__.py
  - __init__.py
  - smoke_test.py
  - tests
    - 201109942.txt
    - 192353549.txt
    - importing_test.py
    - 190352623.txt
    - __init__.py
    - 201088400.txt
    - 192298930.txt
    - 198379247.txt
  - view.py
- .github
  - dependabot.yml
  - labeler.yml
  - workflows
    - isort.yml
    - labeler.yml
- discordbot
  - functional_test.py
  - emoji_test.py
  - bot.py
  - unit_test.py
  - commands
    - p1p1.py
    - clearimagecache.py
    - rhinos.py
    - legal.py
    - whois.py
    - resources.py
    - invite.py
    - gatherlingbug.py
    - history.py
    - patreon.py
    - rulings.py
    - search.py
    - rotation.py
    - tournament.py
    - update.py
    - price.py
    - notpenny.py
    - randomcard.py
    - randomdeck.py
    - version.py
    - modofail.py
    - bug.py
    - spoiler.py
    - barbs.py
    - modobug.py
    - downtimes.py
    - art.py
    - quality.py
    - flavor.py
    - configure.py
    - __init__.py
    - oracle.py
    - google.py
    - echo.py
    - explain.py
    - status.py
    - buglink.py
    - time.py
    - rotate.py
    - restartbot.py
  - __init__.py
  - README.md
  - command.py
  - command_test.py
  - generate_readme.py
  - emoji.py
- github_tools
  - .prout.json
  - webhooks.py
  - __init__.py
- card_aliases.tsv
- run.py
- Procfile
- gh.wsgi
- bootstrap.sh
- .coveragerc
- logsite_migrations
  - env.py
  - versions
    - 0f0bd1e9444e_.py
    - 9c3b192c6dbf_.py
    - 4680022c11c2_.py
  - README
  - alembic.ini
  - script.py.mako
- modo_bugs
  - repo.py
  - fetcher.py
  - update.py
  - scrape_announcements.py
  - __init__.py
  - main.py
  - strings.py
  - verification.py
  - scrape_bugblog.py
- bors.toml
- .pre-commit-config.yaml
- ding.ogg
- dev_test.py
- .prout.json
- dev.py
- price_grabber
  - price.py
  - srv_prices.py
  - price_grabber.py
  - __init__.py
  - parser.py
- discordsite
  - __init__.py
- .editorconfig
- .gitattributes
- .pylintrc
- decksite
  - sql
    - 2.sql
    - 39.sql
    - 56.sql
    - 38.sql
    - 58.sql
    - 1.sql
    - 13.sql
    - 23.sql
    - 54.sql
    - 41.sql
    - 14.sql
    - 33.sql
    - 46.sql
    - 42.sql
    - 18.sql
    - 40.sql
    - 22.sql
    - 36.sql
    - 49.sql
    - 53.sql
    - 50.sql
    - 6.sql
    - 43.sql
    - 8.sql
    - 35.sql
    - 34.sql
    - 52.sql
    - 10.sql
    - 5.sql
    - 20.sql
    - 27.sql
    - 16.sql
    - 51.sql
    - 11.sql
    - 26.sql
    - 3.sql
    - 32.sql
    - 48.sql
    - 29.sql
    - 17.sql
    - 57.sql
    - 28.sql
    - 47.sql
    - 25.sql
    - 37.sql
    - 19.sql
    - 55.sql
    - 31.sql
    - 9.sql
    - 44.sql
    - 12.sql
    - 15.sql
    - 24.sql
    - 30.sql
    - 45.sql
    - 7.sql
    - 4.sql
  - charts
    - chart.py
    - __init__.py
  - cache.py
  - views
    - edit_aliases.py
    - resources.py
    - archetypes.py
    - league_info.py
    - cards.py
    - edit_matches.py
    - prizes.py
    - seasons.py
    - matchups.py
    - edit_rules.py
    - tournament_hosting.py
    - bugs.py
    - person_matches.py
    - signup.py
    - person_achievements.py
    - rotation.py
    - rotation_changes.py
    - deck_check.py
    - report.py
    - retire.py
    - competition.py
    - edit_archetypes.py
    - edit_news.py
    - player_notes.py
    - unlink.py
    - admin_retire.py
    - edit_league.py
    - faqs.py
    - competitions.py
    - community_guidelines.py
    - league_form.py
    - add_form.py
    - home.py
    - decks.py
    - __init__.py
    - archetype.py
    - decklist_form.py
    - person.py
    - tournaments.py
    - achievements.py
    - about.py
    - rotation_checklist.py
    - admin.py
    - about_pdm.py
    - people.py
    - link_accounts.py
    - tournament_leaderboards.py
    - card.py
    - deck.py
  - view_test.py
  - league_test.py
  - league.py
  - templates
    - resources.mustache
    - section.mustache
    - achievements.mustache
    - matchups.mustache
    - footer.mustache
    - scryfallfilter.mustache
    - aboutpdm.mustache
    - rotationchanges.mustache
    - rotationchecklist.mustache
    - tournaments.mustache
    - addform.mustache
    - signup.mustache
    - stats.mustache
    - rotation.mustache
    - card.mustache
    - people.mustache
    - tournamentleaderboards.mustache
    - deckrow.mustache
    - persondropdown.mustache
    - bugs.mustache
    - report.mustache
    - editleague.mustache
    - leagueinfo.mustache
    - chooser.mustache
    - entry.mustache
    - deckerrors.mustache
    - cardstable.mustache
    - faqs.mustache
    - unlink.mustache
    - person.mustache
    - search.mustache
    - tournamentcalendar.mustache
    - banner.mustache
    - archetypes.mustache
    - legal.mustache
    - editnews.mustache
    - deckmatches.mustache
    - editmatches.mustache
    - playernotes.mustache
    - rotationkey.mustache
    - tournamentrounds.mustache
    - archetypetree.mustache
    - seasonchooser.mustache
    - about.mustache
    - language_switcher.mustache
    - editaliases.mustache
    - adminretire.mustache
    - seasons.mustache
    - personmatches.mustache
    - personachievements.mustache
    - prizes.mustache
    - linkaccounts.mustache
    - header.mustache
    - competitions.mustache
    - deckcheck.mustache
    - deck.mustache
    - menu.mustache
    - archetype.mustache
    - archetypedropdown.mustache
    - communityguidelines.mustache
    - admin.mustache
    - tournamenttoggle.mustache
    - editrules.mustache
    - home.mustache
    - intro.mustache
    - news.mustache
    - competition.mustache
    - tournamenthosting.mustache
    - decks.mustache
    - faqsbody.mustache
    - deckembed.mustache
    - retire.mustache
    - record.mustache
    - matchupgrid.mustache
    - cards.mustache
    - decktable.mustache
    - contact.mustache
    - spinner.mustache
    - bugpolicy.mustache
    - singlecard.mustache
    - filterstoggle.mustache
    - editarchetypes.mustache
    - livedecktable.mustache
  - prepare.py
  - database.py
  - controllers
    - resources.py
    - league.py
    - api.py
    - competitions.py
    - __init__.py
    - about.py
    - admin.py
    - metagame.py
  - webpack.config.js
  - translation.py
  - deck_name.py
  - translation_test.py
  - scrapers
    - gatherling.drop.html
    - gatherling_test.py
    - league3.py
    - gatherling.top8.html
    - test_manual_tappedout.yaml
    - gatherling.top4.html
    - gatherling.py
    - scraper_test.py
    - league2.py
    - bugged_cards.py
    - league1.py
    - __init__.py
    - decksite.py
    - mtggoldfish.py
    - tappedout.py
  - data
    - form.py
    - elo_test.py
    - matchup.py
    - top.py
    - match.py
    - news.py
    - card_test.py
    - elo.py
    - models
      - __init__.py
      - person.py
    - competition.py
    - rule.py
    - deck_test.py
    - query.py
    - season.py
    - __init__.py
    - archetype.py
    - person.py
    - achievements.py
    - query_test.py
    - preaggregation.py
    - card.py
    - deck.py
  - __init__.py
  - main.py
  - deck_name_test.py
  - smoke_test.py
  - auth.py
  - resources.json
  - deck_type.py
  - view.py
- setup.py
- LICENSE.md
- requirements-dev.txt
- prices.wsgi
- analysis
  - analysis.py
  - __init__.py
  - analysis_test.py
- shared_web
  - decorators.py
  - translations
    - messages.pot
  - flask_app.py
  - views
    - internal_server_error.py
    - unauthorized.py
    - not_found.py
    - __init__.py
    - error.py
  - jsdependencies.txt
  - template_test.py
  - templates
    - unauthorized.mustache
    - page.mustache
    - error.mustache
    - jsdependencies.mustache
  - localization.py
  - template.py
  - logger.py
  - api.py
  - smoke.py
  - base_view.py
  - oauth.py
  - __init__.py
  - static
    - robots.txt
    - images
      - favicon
        favicon.ico
      - language_icon.svg
      - github.svg
      - banners
    - deny-all-robots.txt
    - js
      - pd.js
      - charts.js
      - index.jsx
      - tooltips.js
      - tipped.min.js
    - css
- .travis.yml
- README.md
- decksite.wsgi
- package.json
- pytest.ini
- .isort.cfg
- botsite.wsgi
- requirements.txt
- .codecov.yml
- logsite.wsgi
- generate_readme.py
- config.json.example
- .eslintrc.js
- shared
  - repo.py
  - decorators.py
  - container.py
  - lazy.py
  - text.py
  - perf.py
  - guarantee.py
  - dtutil_test.py
  - database.py
  - dtutil.py
  - redis.py
  - serialization.py
  - database_test.py
  - fetch_tools.py
  - text_test.py
  - __init__.py
  - configuration.py
  - pd_exception.py
- .mergify.yml
- .gitignore
- rotation_script
  - rotation_script.py
  - __init__.py
- Jenkinsfile
- babel.cfg
- pylint_monolith
  - monolith_checker.py
  - await_checker.py
  - __init__.py
  - l18n_check.py
- magic
  - whoosh_search_test.py
  - whoosh_constants.py
  - oracle_test.py
  - legality_test.py
  - multiverse.py
  - fetcher.py
  - mana_test.py
  - rotation.py
  - card_test.py
  - models
    - printing.py
    - __init__.py
    - cardref.py
    - card.py
    - deck.py
  - whoosh_search.py
  - database.py
  - multiverse_test.py
  - abc
    - card_description.py
    - __init__.py
  - image_fetcher.py
  - whoosh_write.py
  - mana.py
  - legality.py
  - __init__.py
  - oracle.py
  - tournaments.py
  - decklist.py
  - rotation_test.py
  - card_price.py
  - card.py
  - decklist_test.py
- .pyup.yml

import datetime
import re
import urllib.parse
from typing import Any, Dict, List, Optional, Tuple

import bs4
from bs4 import BeautifulSoup, ResultSet

from decksite.data import archetype, competition, deck, match, person
from decksite.database import db
from magic import decklist
from shared import dtutil, fetch_tools
from shared.pd_exception import InvalidDataException
from shared_web import logger

WINNER = '1st'
SECOND = '2nd'
TOP_4 = 't4'
TOP_8 = 't8'

ALIASES: Dict[str, str] = {}

def scrape(limit: int = 50) -> None:
    soup = BeautifulSoup(fetch_tools.fetch('https://gatherling.com/eventreport.php?format=Penny+Dreadful&series=&season=&mode=Filter+Events', character_encoding='utf-8'), 'html.parser')
    tournaments = [(gatherling_url(link['href']), link.string) for link in soup.find_all('a') if link['href'].find('eventreport.php?') >= 0]
    n = 0
    for (url, name) in tournaments:
        i = tournament(url, name)
        n = n + i
        if n > limit:
            return

def tournament(url: str, name: str) -> int:
    s = fetch_tools.fetch(url, character_encoding='utf-8', retry=True)

    # Tournament details
    soup = BeautifulSoup(s, 'html.parser')
    cell = soup.find('div', {'id': 'EventReport'}).find_all('td')[1]

    name = cell.find('a').string.strip()
    day_s = cell.find('br').next.strip()
    if '-0001' in day_s:
        # Tournament has been incorrectly configured.
        return 0

    dt, competition_series = get_dt_and_series(name, day_s)
    top_n = find_top_n(soup)
    # Tournaments that currently advertise a "top 0" are unstarted/in progress and should be ignored for now.
    if top_n == competition.Top.NONE:
        return 0
    db().begin('tournament')
    competition_id = competition.get_or_insert_competition(dt, dt, name, competition_series, url, top_n)
    ranks = rankings(soup)
    medals = medal_winners(s)
    final = finishes(medals, ranks)
    n = add_decks(dt, competition_id, final, s)
    db().commit('tournament')
    return n

# Hack in the known start time and series name because it's not in the page, depending on the series.
def get_dt_and_series(name: str, day_s: str) -> Tuple[datetime.datetime, str]:
    if 'APAC' in name:
        competition_series = 'APAC Penny Dreadful Sundays'
        start_time = '16:00'
        dt = get_dt(day_s, start_time, dtutil.APAC_SERIES_TZ)
    elif 'FNM' in name:
        competition_series = 'Penny Dreadful FNM'
        start_time = '19:00'
        dt = get_dt(day_s, start_time, dtutil.GATHERLING_TZ)
    else:
        if 'Saturday' in name or 'Sunday' in name or 'PDS' in name:
            start_time = '13:30'
        else:
            start_time = '19:00'
        dt = get_dt(day_s, start_time, dtutil.GATHERLING_TZ)
        competition_series = 'Penny Dreadful {day}s'.format(day=dtutil.day_of_week(dt, dtutil.GATHERLING_TZ))
    return (dt, competition_series)

def get_dt(day_s: str, start_time: str, timezone: Any) -> datetime.datetime:
    date_s = day_s + ' {start_time}'.format(start_time=start_time)
    return dtutil.parse(date_s, '%d %B %Y %H:%M', timezone)

def find_top_n(soup: BeautifulSoup) -> competition.Top:
    return competition.Top(int(soup.find('div', {'id': 'EventReport'}).find_all('table')[1].find_all('td')[1].string.strip().replace('TOP ', '')))

def add_decks(dt: datetime.datetime, competition_id: int, final: Dict[str, int], s: str) -> int:
    # The HTML of this page is so badly malformed that BeautifulSoup cannot really help us with this bit.
    rows = re.findall('<tr style=">(.*?)</tr>', s, re.MULTILINE | re.DOTALL)
    decks_added, ds = 0, []
    matches: List[bs4.element.Tag] = []
    for row in rows:
        cells = BeautifulSoup(row, 'html.parser').find_all('td')
        d = tournament_deck(cells, competition_id, dt, final)
        if d is not None:
            if d.get('id') is None or not match.load_matches_by_deck(d):
                decks_added += 1
                ds.append(d)
                matches += tournament_matches(d)
    add_ids(matches, ds)
    insert_matches_without_dupes(dt, matches)
    guess_archetypes(ds)
    return decks_added

def guess_archetypes(ds: List[deck.Deck]) -> None:
    deck.calculate_similar_decks(ds)
    for d in ds:
        if d.similar_decks and d.similar_decks[0].archetype_id is not None:
            archetype.assign(d.id, d.similar_decks[0].archetype_id, None, False)

def rankings(soup: BeautifulSoup) -> List[str]:
    rows = soup.find(text='Current Standings').find_parent('table').find_all('tr')

    # Expected structure:
    # <td colspan="8"><h6> Penny Dreadful Thursdays 1.02</h6></td>
    # <td>Rank</td>, <td>Player</td>, <td>Match Points</td>, <td>OMW %</td>, <td>PGW %</td>, <td>OGW %</td>, <td>Matches Played</td>, <td>Byes</td>
    # <td colspan="8"><br/><b> Tiebreakers Explained </b><p></p></td>
    # <td colspan="8"> Players with the same number of match points are ranked based on three tiebreakers scores according to DCI rules. In order, they are: </td>
    # <td colspan="8"> OMW % is the average percentage of matches your opponents have won. </td>
    # <td colspan="8"> PGW % is the percentage of games you have won. </td>
    # <td colspan="8"> OGW % is the average percentage of games your opponents have won. </td>
    # <td colspan="8"> BYEs are not included when calculating standings. For example, a player with one BYE, one win, and one loss has a match win percentage of .50 rather than .66</td>
    # <td colspan="8"> When calculating standings, any opponent with less than a .33 win percentage is calculated as .33</td>

    rows = rows[2:-7]
    ranks = []
    for row in rows:
        cells = row.find_all('td')
        mtgo_username = aliased(cells[1].string)
        ranks.append(mtgo_username)
    return ranks

def medal_winners(s: str) -> Dict[str, int]:
    winners = {}
    # The HTML of this page is so badly malformed that BeautifulSoup cannot really help us with this bit.
    rows = re.findall('<tr style=">(.*?)</tr>', s, re.MULTILINE | re.DOTALL)
    for row in rows:
        player = BeautifulSoup(row, 'html.parser').find_all('td')[2]
        if player.find('img'):
            mtgo_username = aliased(player.a.contents[0])
            img = re.sub(r'styles/Chandra/images/(.*?)\.png', r'\1', player.img['src'])
            if img == WINNER:
                winners[mtgo_username] = 1
            elif img == SECOND:
                winners[mtgo_username] = 2
            elif img == TOP_4:
                winners[mtgo_username] = 3
            elif img == TOP_8:
                winners[mtgo_username] = 5
            elif img == 'verified':
                pass
            else:
                raise InvalidDataException('Unknown player image `{img}`'.format(img=img))
    return winners

def finishes(winners: Dict[str, int], ranks: List[str]) -> Dict[str, int]:
    final = winners.copy()
    r = len(final)
    for p in ranks:
        if p not in final.keys():
            r += 1
            final[p] = r
    return final

def tournament_deck(cells: ResultSet, competition_id: int, date: datetime.datetime, final: Dict[str, int]) -> Optional[deck.Deck]:
    d: deck.RawDeckDescription = {'source': 'Gatherling', 'competition_id': competition_id, 'created_date': dtutil.dt2ts(date)}
    player = cells[2]
    username = aliased(player.a.contents[0].string)
    d['mtgo_username'] = username
    d['finish'] = final.get(username)
    link = cells[4].a
    d['url'] = gatherling_url(link['href'])
    d['name'] = link.string
    if cells[5].find('a'):
        d['archetype'] = cells[5].a.string
    else:
        d['archetype'] = cells[5].string
    gatherling_id = urllib.parse.parse_qs(urllib.parse.urlparse(str(d['url'])).query)['id'][0]
    d['identifier'] = gatherling_id
    existing = deck.get_deck_id(d['source'], d['identifier'])
    if existing is not None:
        return deck.load_deck(existing)
    dlist = decklist.parse(fetch_tools.post(gatherling_url('deckdl.php'), {'id': gatherling_id}))
    d['cards'] = dlist
    if len(dlist['maindeck']) + len(dlist['sideboard']) == 0:
        logger.warning('Rejecting deck with id {id} because it has no cards.'.format(id=gatherling_id))
        return None
    return deck.add_deck(d)

def tournament_matches(d: deck.Deck) -> List[bs4.element.Tag]:
    url = 'https://gatherling.com/deck.php?mode=view&id={identifier}'.format(identifier=d.identifier)
    s = fetch_tools.fetch(url, character_encoding='utf-8', retry=True)
    soup = BeautifulSoup(s, 'html.parser')
    anchor = soup.find(string='MATCHUPS')
    if anchor is None:
        logger.warning('Skipping {id} because it has no MATCHUPS.'.format(id=d.id))
        return []
    table = anchor.findParents('table')[0]
    rows = table.find_all('tr')
    rows.pop(0) # skip header
    rows.pop() # skip empty last row
    return find_matches(d, rows)

MatchListType = List[Dict[str, Any]]

def find_matches(d: deck.Deck, rows: ResultSet) -> MatchListType:
    matches = []
    for row in rows:
        tds = row.find_all('td')
        if 'No matches were found for this deck' in tds[0].renderContents().decode('utf-8'):
            logger.warning('Skipping {identifier} because it played no matches.'.format(identifier=d.identifier))
            break
        round_type, num = re.findall(r'([TR])(\d+)', tds[0].string)[0]
        num = int(num)
        if round_type == 'R':
            elimination = 0
            round_num = num
        elif round_type == 'T':
            elimination = num
            round_num += 1
        else:
            raise InvalidDataException('Round was neither Swiss (R) nor Top 4/8 (T) in {round_type} for {id}'.format(round_type=round_type, id=d.id))
        if 'Bye' in tds[1].renderContents().decode('utf-8') or 'No Deck Found' in tds[5].renderContents().decode('utf-8'):
            left_games, right_games, right_identifier = 2, 0, None
        else:
            left_games, right_games = tds[2].string.split(' - ')
            href = tds[5].find('a')['href']
            right_identifier = re.findall(r'id=(\d+)', href)[0]
        matches.append({
            'round': round_num,
            'elimination': elimination,
            'left_games': left_games,
            'left_identifier': d.identifier,
            'right_games': right_games,
            'right_identifier': right_identifier
        })
    return matches

def insert_matches_without_dupes(dt: datetime.datetime, matches: MatchListType) -> None:
    db().begin('insert_matches_without_dupes')
    inserted: Dict[str, bool] = {}
    for m in matches:
        reverse_key = str(m['round']) + '|' + str(m['right_id']) + '|' + str(m['left_id'])
        if inserted.get(reverse_key):
            continue
        match.insert_match(dt, m['left_id'], m['left_games'], m['right_id'], m['right_games'], m['round'], m['elimination'])
        key = str(m['round']) + '|' + str(m['left_id']) + '|' + str(m['right_id'])
        inserted[key] = True
    db().commit('insert_matches_without_dupes')

def add_ids(matches: MatchListType, ds: List[deck.Deck]) -> None:
    decks_by_identifier = {d.identifier: d for d in ds}
    def lookup(gatherling_id: int) -> deck.Deck:
        try:
            return decks_by_identifier[gatherling_id]
        except KeyError:
            raise InvalidDataException("Unable to find deck with gatherling id '{0}'".format(gatherling_id))
    for m in matches:
        m['left_id'] = lookup(m['left_identifier']).id
        m['right_id'] = lookup(m['right_identifier']).id if m['right_identifier'] else None

def gatherling_url(href: str) -> str:
    if href.startswith('http'):
        return href
    return 'https://gatherling.com/{href}'.format(href=href)

def aliased(username: str) -> str:
    if not ALIASES:
        load_aliases()
    return ALIASES.get(username, username)

def load_aliases() -> None:
    ALIASES['dummyplaceholder'] = '' # To prevent doing the load on every lookup if there are no aliases in the db.
    for entry in person.load_aliases():
        ALIASES[entry.alias] = entry.mtgo_username