# coding=utf-8

import json
import random
import re
import requests

from proxy import Proxy
from .basespider import BaseSpider


class GatherproxySpider(BaseSpider):
    name = 'gatherproxy'

    def __init__(self, *a, **kwargs):
        super(GatherproxySpider, self).__init__(*a, **kwargs)
        self.urls = [
            'http://gatherproxy.com/',
            'http://www.gatherproxy.com/proxylist/anonymity/?t=Anonymous',
            'http://gatherproxy.com/proxylist/country/?c=China',
        ]

        self.headers = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'en-US,en;q=0.5',
            'Connection': 'keep-alive',
            'Host': 'www.gatherproxy.com',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:52.0) Gecko/20100101 Firefox/52.0'
        }

        # self.proxies = self.get_proxy()
        self.init()

    def parse_page(self, response):
        pattern = re.compile('gp.insertPrx\((.*?)\)', re.S)
        items = re.findall(pattern, response.body.decode())
        for item in items:
            data = json.loads(item)
            #端口用的是十六进制
            port = data.get('PROXY_PORT')
            port = str(int(port, 16))

            proxy = Proxy()
            proxy.set_value(
                    ip = data.get('PROXY_IP'),
                    port = port,
                    country = data.get('PROXY_COUNTRY'),
                    anonymity = data.get('PROXY_TYPE'),
                    source = self.name,
            )

            self.add_proxy(proxy = proxy)

    def get_proxy(self):
        try:
            url = 'http://127.0.0.1:8000/?name={0}'.format(self.name)
            r = requests.get(url = url)
            if r.text != None and r.text != '':
                data = json.loads(r.text)
                if len(data) > 0:
                    proxy = random.choice(data)
                    ip = proxy.get('ip')
                    port = proxy.get('port')
                    address = '%s:%s' % (ip, port)

                    proxies = {
                        'http': 'http://%s' % address
                    }
                    return proxies
        except:
            return None