import re

from requests_html import HTML

from scylla.database import ProxyIP
from scylla.worker import Worker
from .base_provider import BaseProvider


class ProxylistsProvider(BaseProvider):

    def __init__(self):
        self.w = Worker()
        self.country_patten = re.compile('^/(.+)_0.html$')

    def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for tr in html.find('table table tr'):
            ip_element = tr.find('td:nth-of-type(1)', first=True)
            port_element = tr.find('td:nth-of-type(2)', first=True)
            if ip_element and port_element:
                ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_element.text).group(0)
                port = re.search(r'\d{2,5}', port_element.text).group(0)
                ip_list.append(ProxyIP(ip=ip, port=port))

        return ip_list

    def urls(self) -> [str]:
        ret = set([])
        country_url = 'http://www.proxylists.net/countries.html'
        country_page = self.w.get_html(country_url, False)
        for a in country_page.find('a'):
            relative_path = a.attrs['href']
            if self.country_patten.match(relative_path) :
                ret.update(self.gen_url_for_country(self.country_patten.findall(relative_path)[0]))
                break
        return list(ret)

    def gen_url_for_country(self, country) -> [str]:
        ret = []
        first_page = self.w.get_html('http://www.proxylists.net/{}_0.html'.format(country), False)
        for a in first_page.find('table table tr:last-of-type a'):
            ret.append('http://www.proxylists.net/{}'.format(a.attrs['href']))
        return ret

    @staticmethod
    def should_render_js() -> bool:
        return True