Python urllib.request.HTTPCookieProcessor() Examples

The following are 22 code examples of urllib.request.HTTPCookieProcessor(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module urllib.request , or try the search function .
Example #1
Source File: base.py    From WordQuery with GNU General Public License v3.0 10 votes vote down vote up
def __init__(self):
        super(WebService, self).__init__()
        self.cache = defaultdict(defaultdict)
        self._cookie = CookieJar()
        self._opener = urllib2.build_opener(
            urllib2.HTTPCookieProcessor(self._cookie))
        self.query_interval = 1 
Example #2
Source File: DLInfos.py    From iqiyi-parser with MIT License 7 votes vote down vote up
def __request__(self):

        Cookiejar = CookieJar()
        opener = build_opener(HTTPCookieProcessor(Cookiejar))
        _header = dict(self.headers.items())
        if self.cookie:
            _header.update({'Cookie': self.cookie})
        req = Request(self.url, headers=_header, origin_req_host=self.host)

        res = None

        try:
            res = opener.open(req)
            # break
        except HTTPError as e:
            # if e.code >= 400 and e.code < 500:
            return e, None

        except (socket.timeout, URLError) as e:
            return e, None
        except Exception as e:
            traceback.print_exc()
            return e, None

        return res, Cookiejar._cookies 
Example #3
Source File: web_utils.py    From DLink_Harvester with GNU General Public License v3.0 6 votes vote down vote up
def cookie_friendly_download(referer_url, file_url, store_dir='.', timeout=1000):
    from http.cookiejar import CookieJar
    from urllib import request
    cj = CookieJar()
    cp = request.HTTPCookieProcessor(cj)
    opener = request.build_opener(cp)
    with opener.open(referer_url) as fin:
        fin.headers.items()
    import os
    from os import path
    with opener.open(file_url, timeout=timeout) as fin:
        file_bin = fin.read()
        filename = fin.headers['Content-Disposition']
        filename = filename.split(';')[-1].split('=')[1]
        os.makedirs(store_dir, exist_ok=True)
        with open(path.join(store_dir, filename), mode='wb') as fout:
            fout.write(file_bin)
            return path.join(store_dir, filename) 
Example #4
Source File: tweet_operation.py    From twitter-intelligence with MIT License 6 votes vote down vote up
def get_json_response(tweet_criteria, refresh_cursor, cookiejar, proxy):
        url = "https://twitter.com/i/search/timeline?f=tweets&q=%s&src=typd&max_position=%s"

        url_data = ''

        if hasattr(tweet_criteria, 'username'):
            url_data += ' from:' + tweet_criteria.username

        if hasattr(tweet_criteria, 'query'):
            url_data += ' ' + tweet_criteria.query

        if hasattr(tweet_criteria, 'since'):
            url_data += ' since:' + tweet_criteria.since

        if hasattr(tweet_criteria, 'until'):
            url_data += ' until:' + tweet_criteria.until

        if hasattr(tweet_criteria, 'topTweets'):
            if tweet_criteria.topTweets:
                url = "https://twitter.com/i/search/timeline?q=%s&src=typd&max_position=%s"

        url = url % (urllib.parse.quote(url_data), urllib.parse.quote(refresh_cursor))

        headers = [
            ('Host', "twitter.com"),
            ('User-Agent',
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"),
            ('Accept', "application/json, text/javascript, */*; q=0.01"),
            ('Accept-Language', "de,en-US;q=0.7,en;q=0.3"),
            ('X-Requested-With', "XMLHttpRequest"),
            ('Referer', url),
            ('Connection', "keep-alive")
        ]

        if proxy:
            opener = urllib2.build_opener(urllib2.ProxyHandler({'http': proxy, 'https': proxy}),
                                          urllib2.HTTPCookieProcessor(cookiejar))
        else:
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
            opener.addheaders = headers

        try:
            response = opener.open(url)
            json_response = response.read()
        except:
            print
            "Twitter weird response. Try to see on browser: https://twitter.com/search?q=%s&src=typd" % urllib.parse.quote(
                url_data)
            sys.exit()
            return

        data = json.loads(json_response)

        return data 
Example #5
Source File: scholar.py    From dblp with MIT License 6 votes vote down vote up
def __init__(self):
        self.articles = []
        self.query = None
        self.cjar = MozillaCookieJar()

        # If we have a cookie file, load it:
        if ScholarConf.COOKIE_JAR_FILE and \
           os.path.exists(ScholarConf.COOKIE_JAR_FILE):
            try:
                self.cjar.load(ScholarConf.COOKIE_JAR_FILE,
                               ignore_discard=True)
                ScholarUtils.log('info', 'loaded cookies file')
            except Exception as msg:
                ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
                self.cjar = MozillaCookieJar() # Just to be safe

        self.opener = build_opener(HTTPCookieProcessor(self.cjar))
        self.settings = None # Last settings object, if any 
Example #6
Source File: tokenresolver.py    From plugin.video.vrt.nu with GNU General Public License v3.0 5 votes vote down vote up
def _get_fresh_token(self, refresh_token, name):
        """Refresh an expired X-VRT-Token, vrtlogin-at or vrtlogin-rt token"""
        refresh_url = self._TOKEN_GATEWAY_URL + '/refreshtoken?legacy=true'
        cookie_value = 'vrtlogin-rt=' + refresh_token
        headers = {'Cookie': cookie_value}
        cookiejar = cookielib.CookieJar()
        opener = build_opener(HTTPCookieProcessor(cookiejar), ProxyHandler(self._proxies))
        req = Request(refresh_url, headers=headers)
        log(2, 'URL get: {url}', url=refresh_url)
        opener.open(req)
        return TokenResolver._create_token_dictionary(cookiejar, name) 
Example #7
Source File: __init__.py    From odoorpc with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(
        self,
        host,
        port=8069,
        timeout=120,
        version=None,
        deserialize=True,
        opener=None,
    ):
        super(ConnectorJSONRPC, self).__init__(host, port, timeout, version)
        self.deserialize = deserialize
        # One URL opener (with cookies handling) shared between
        # JSON and HTTP requests
        if opener is None:
            cookie_jar = CookieJar()
            opener = build_opener(HTTPCookieProcessor(cookie_jar))
        self._opener = opener
        self._proxy_json, self._proxy_http = self._get_proxies() 
Example #8
Source File: tokenresolver.py    From plugin.video.vrt.nu with GNU General Public License v3.0 5 votes vote down vote up
def _get_usertoken(self, name=None, login_json=None):
        """Get a user X-VRT-Token, vrtlogin-at, vrtlogin-expiry, vrtlogin-rt, SESSION, OIDCXSRF or state token"""
        if not login_json:
            login_json = self._get_login_json()
        cookiejar = cookielib.CookieJar()
        opener = build_opener(HTTPCookieProcessor(cookiejar), ProxyHandler(self._proxies))
        log(2, 'URL get: {url}', url=unquote(self._USER_TOKEN_GATEWAY_URL))
        opener.open(self._USER_TOKEN_GATEWAY_URL)
        xsrf = next((cookie for cookie in cookiejar if cookie.name == 'OIDCXSRF'), None)
        if xsrf is None:
            return None
        payload = dict(
            UID=login_json.get('UID'),
            UIDSignature=login_json.get('UIDSignature'),
            signatureTimestamp=login_json.get('signatureTimestamp'),
            client_id='vrtnu-site',
            _csrf=xsrf.value
        )
        data = urlencode(payload).encode()
        log(2, 'URL post: {url}', url=unquote(self._VRT_LOGIN_URL))
        opener.open(self._VRT_LOGIN_URL, data=data)

        # Cache additional tokens for later use
        refreshtoken = TokenResolver._create_token_dictionary(cookiejar, cookie_name='vrtlogin-rt')
        accesstoken = TokenResolver._create_token_dictionary(cookiejar, cookie_name='vrtlogin-at')
        if refreshtoken is not None:
            from json import dumps
            cache_file = self._get_token_filename('vrtlogin-rt')
            update_cache(cache_file, dumps(refreshtoken), self._TOKEN_CACHE_DIR)
        if accesstoken is not None:
            from json import dumps
            cache_file = self._get_token_filename('vrtlogin-at')
            update_cache(cache_file, dumps(accesstoken), self._TOKEN_CACHE_DIR)

        return TokenResolver._create_token_dictionary(cookiejar, name) 
Example #9
Source File: common.py    From iqiyi-parser with MIT License 5 votes vote down vote up
def initOpener(self):
        self.cookiejar = CookieJar()
        self.opener = build_opener(HTTPCookieProcessor(self.cookiejar))
        self.opener.addheaders = list(self.headers.items()) 
Example #10
Source File: base.py    From FastWordQuery with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self):
        super(WebService, self).__init__()
        self._cookie = CookieJar()
        self._opener = urllib2.build_opener(
            urllib2.HTTPCookieProcessor(self._cookie))
        self.query_interval = 1.0 
Example #11
Source File: html_downloader.py    From SmallReptileTraining with MIT License 5 votes vote down vote up
def download(self, url, retry_count=3, headers=None, proxy=None, data=None):
        if url is None:
            return None
        try:
            req = request.Request(url, headers=headers, data=data)
            cookie = cookiejar.CookieJar()
            cookie_process = request.HTTPCookieProcessor(cookie)
            opener = request.build_opener()
            if proxy:
                proxies = {urlparse(url).scheme: proxy}
                opener.add_handler(request.ProxyHandler(proxies))
            content = opener.open(req).read()
        except error.URLError as e:
            print('HtmlDownLoader download error:', e.reason)
            content = None
            if retry_count > 0:
                if hasattr(e, 'code') and 500 <= e.code < 600:
                    #说明是 HTTPError 错误且 HTTP CODE 为 5XX 范围说明是服务器错误,可以尝试再次下载
                    return self.download(url, retry_count-1, headers, proxy, data)
        return content 
Example #12
Source File: spider_main.py    From SmallReptileTraining with MIT License 5 votes vote down vote up
def create_cookie_opener(self):
        '''
        设置启用Cookie
        :return: 返回一个自定义的opener
        '''
        cookie = cookiejar.CookieJar()
        cookie_process = request.HTTPCookieProcessor(cookie)
        opener = request.build_opener(cookie_process)
        return opener 
Example #13
Source File: LuoguBrowser.py    From LuoguCrawler with MIT License 5 votes vote down vote up
def setOpener(self):
        """ 初始化opener
        """
        cj = cookiejar.CookieJar()
        pro = request.HTTPCookieProcessor(cj)
        self.opener = request.build_opener(pro)
        header = []
        for key, value in self._headers.items():
            elem = (key, value)
            header.append(elem)
        self.opener.addheaders = header 
Example #14
Source File: browser.py    From Hatkey with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self):
        self.cookiejar = CookieJar()
        self._cookie_processor = HTTPCookieProcessor(self.cookiejar)
        self.form = None

        self.url = "http://0.0.0.0:8080/"
        self.path = "/"
        
        self.status = None
        self.data = None
        self._response = None
        self._forms = None 
Example #15
Source File: jsonrpclib.py    From odoorpc with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, host, port, timeout=120, ssl=False, opener=None):
        self._root_url = "{http}{host}:{port}".format(
            http=(ssl and "https://" or "http://"), host=host, port=port)
        self._timeout = timeout
        self._builder = URLBuilder(self)
        self._opener = opener
        if not opener:
            cookie_jar = CookieJar()
            self._opener = build_opener(HTTPCookieProcessor(cookie_jar)) 
Example #16
Source File: nse.py    From nsetools with MIT License 5 votes vote down vote up
def nse_opener(self):
        """
        builds opener for urllib2
        :return: opener object
        """
        cj = CookieJar()
        return build_opener(HTTPCookieProcessor(cj)) 
Example #17
Source File: httpbot.py    From humblebundle with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, base_url="", tag="", cookiejar=None, debug=False):
        self.tag = tag
        hh  = urllib2.HTTPHandler( debuglevel=1 if debug else 0)
        hsh = urllib2.HTTPSHandler(debuglevel=1 if debug else 0)
        cp  = urllib2.HTTPCookieProcessor(cookiejar)
        self._opener = urllib2.build_opener(hh, hsh, cp)
        scheme, netloc, path, q, f  = urlparse.urlsplit(base_url, "http")
        if not netloc:
            netloc, _, path = path.partition('/')
        self.base_url = urlparse.urlunsplit((scheme, netloc, path, q, f)) 
Example #18
Source File: jsonrpclib.py    From odoorpc with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, host, port, timeout=120, ssl=False, opener=None):
        self._root_url = "{http}{host}:{port}".format(
            http=(ssl and "https://" or "http://"), host=host, port=port
        )
        self._timeout = timeout
        self._builder = URLBuilder(self)
        self._opener = opener
        if not opener:
            cookie_jar = CookieJar()
            self._opener = build_opener(HTTPCookieProcessor(cookie_jar)) 
Example #19
Source File: asf_template.py    From esa_sentinel with MIT License 4 votes vote down vote up
def get_new_cookie(self):
        # Start by prompting user to input their credentials
        
        # Another Python2/3 workaround
        try:
            new_username = raw_input("Username: ")
        except NameError:
            new_username = input("Username: ")
        new_password = getpass.getpass(prompt="Password (will not be displayed): ")
        
        # Build URS4 Cookie request
        auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + \
                          self.asf_urs4['redir'] + '&response_type=code&state='
        
        try:
            # python2
            user_pass = base64.b64encode(bytes(new_username + ":" + new_password))
        except TypeError:
            # python3
            user_pass = base64.b64encode(bytes(new_username + ":" + new_password, "utf-8"))
            user_pass = user_pass.decode("utf-8")
        
        # Authenticate against URS, grab all the cookies
        self.cookie_jar = MozillaCookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context))
        request = Request(auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)})
        
        # Watch out cookie rejection!
        try:
            response = opener.open(request)
        except HTTPError as e:
            if e.code == 401:
                print(" > Username and Password combo was not successful. Please try again.")
                return False
            else:
                # If an error happens here, the user most likely has not confirmed EULA.
                print("\nIMPORTANT: There was an error obtaining a download cookie!")
                print("Your user appears to lack permission to download data from the ASF Datapool.")
                print(
                    "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov")
                exit(-1)
        except URLError as e:
            print("\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. ")
            print("Try cookie generation later.")
            exit(-1)
        
        # Did we get a cookie?
        if self.check_cookie_is_logged_in(self.cookie_jar):
            # COOKIE SUCCESS!
            self.cookie_jar.save(self.cookie_jar_path)
            return True
        
        # if we aren't successful generating the cookie, nothing will work. Stop here!
        print("WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again.")
        print("Response was {0}.".format(response.getcode()))
        print(
            "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov")
        exit(-1)
    
    # make sure we're logged into URS 
Example #20
Source File: asf_template.py    From esa_sentinel with MIT License 4 votes vote down vote up
def check_cookie(self):
        
        if self.cookie_jar is None:
            print(" > Cookiejar is bunk: {0}".format(self.cookie_jar))
            return False
        
        # File we know is valid, used to validate cookie
        file_check = 'https://urs.earthdata.nasa.gov/profile'
        
        # Apply custom Redirect Hanlder
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context))
        install_opener(opener)
        
        # Attempt a HEAD request
        request = Request(file_check)
        request.get_method = lambda: 'HEAD'
        try:
            print(" > attempting to download {0}".format(file_check))
            response = urlopen(request, timeout=30)
            resp_code = response.getcode()
            # Make sure we're logged in
            if not self.check_cookie_is_logged_in(self.cookie_jar):
                return False
            
            # Save cookiejar
            self.cookie_jar.save(self.cookie_jar_path)
        
        except HTTPError:
            # If we ge this error, again, it likely means the user has not agreed to current EULA
            print("\nIMPORTANT: ")
            print("Your user appears to lack permissions to download data from the ASF Datapool.")
            print(
                "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov")
            exit(-1)
        
        # This return codes indicate the USER has not been approved to download the data
        if resp_code in (300, 301, 302, 303):
            try:
                redir_url = response.info().getheader('Location')
            except AttributeError:
                redir_url = response.getheader('Location')
            
            # Funky Test env:
            if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']):
                print("Cough, cough. It's dusty in this test env!")
                return True
            
            print("Redirect ({0}) occured, invalid cookie value!".format(resp_code))
            return False
        
        # These are successes!
        if resp_code in (200, 307):
            return True
        
        return False 
Example #21
Source File: net_utils.py    From NSEToolsPy with MIT License 4 votes vote down vote up
def __opener__():
    """
    Builds the opener for the url
    :returns: opener object
    """

    cookie_jar = CookieJar()
    return build_opener(HTTPCookieProcessor(cookie_jar)) 
Example #22
Source File: client.py    From bugatsinho.github.io with GNU General Public License v3.0 4 votes vote down vote up
def cfcookie(netloc, ua, timeout):
    try:
        headers = {'User-Agent': ua}

        req = urllib2.Request(netloc, headers=headers)

        try:
            urllib2.urlopen(req, timeout=int(timeout))
        except urllib2.HTTPError as response:
            result = response.read(5242880)

        jschl = re.findall('name="jschl_vc" value="(.+?)"/>', result)[0]

        init = re.findall('setTimeout\(function\(\){\s*.*?.*:(.*?)};', result)[-1]

        builder = re.findall(r"challenge-form\'\);\s*(.*)a.v", result)[0]

        decryptVal = parseJSString(init)

        lines = builder.split(';')

        for line in lines:

            if len(line) > 0 and '=' in line:

                sections = line.split('=')
                line_val = parseJSString(sections[1])
                decryptVal = int(eval(str(decryptVal) + str(sections[0][-1]) + str(line_val)))

        answer = decryptVal + len(urlparse.urlparse(netloc).netloc)

        query = '%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s' % (netloc, jschl, answer)

        if 'type="hidden" name="pass"' in result:
            passval = re.findall('name="pass" value="(.*?)"', result)[0]
            query = '%s/cdn-cgi/l/chk_jschl?pass=%s&jschl_vc=%s&jschl_answer=%s' % (
                netloc, quote_plus(passval), jschl, answer
            )
            time.sleep(5)

        cookies = cookielib.LWPCookieJar()
        handlers = [urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies)]
        opener = urllib2.build_opener(*handlers)
        urllib2.install_opener(opener)

        try:
            req = urllib2.Request(query, headers=headers)
            urllib2.urlopen(req, timeout=int(timeout))
        except BaseException:
            pass

        cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies])

        return cookie
    except BaseException:
        pass