Python urlparse.urlparse() Examples

The following are code examples for showing how to use urlparse.urlparse(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: pyblish-win   Author: pyblish   File: webchecker.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def getlinkinfos(self):
        # File reading is done in __init__() routine.  Store parser in
        # local variable to indicate success of parsing.

        # If no parser was stored, fail.
        if not self.parser: return []

        rawlinks = self.parser.getlinks()
        base = urlparse.urljoin(self.url, self.parser.getbase() or "")
        infos = []
        for rawlink in rawlinks:
            t = urlparse.urlparse(rawlink)
            # DON'T DISCARD THE FRAGMENT! Instead, include
            # it in the tuples which are returned. See Checker.dopage().
            fragment = t[-1]
            t = t[:-1] + ('',)
            rawlink = urlparse.urlunparse(t)
            link = urlparse.urljoin(base, rawlink)
            infos.append((link, rawlink, fragment))

        return infos 
Example 2
Project: pyblish-win   Author: pyblish   File: urllib2.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def reduce_uri(self, uri, default_port=True):
        """Accept authority or URI and extract only the authority and path."""
        # note HTTP URLs do not have a userinfo component
        parts = urlparse.urlsplit(uri)
        if parts[1]:
            # URI
            scheme = parts[0]
            authority = parts[1]
            path = parts[2] or '/'
        else:
            # host or host:port
            scheme = None
            authority = uri
            path = '/'
        host, port = splitport(authority)
        if default_port and port is None and scheme is not None:
            dport = {"http": 80,
                     "https": 443,
                     }.get(scheme)
            if dport is not None:
                authority = "%s:%d" % (host, dport)
        return authority, path 
Example 3
Project: pyblish-win   Author: pyblish   File: xmlbuilder.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def resolveEntity(self, publicId, systemId):
        assert systemId is not None
        source = DOMInputSource()
        source.publicId = publicId
        source.systemId = systemId
        source.byteStream = self._get_opener().open(systemId)

        # determine the encoding if the transport provided it
        source.encoding = self._guess_media_encoding(source)

        # determine the base URI is we can
        import posixpath, urlparse
        parts = urlparse.urlparse(systemId)
        scheme, netloc, path, params, query, fragment = parts
        # XXX should we check the scheme here as well?
        if path and not path.endswith("/"):
            path = posixpath.dirname(path) + "/"
            parts = scheme, netloc, path, params, query, fragment
            source.baseURI = urlparse.urlunparse(parts)

        return source 
Example 4
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_issue14072(self):
        p1 = urlparse.urlsplit('tel:+31-641044153')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '+31-641044153')

        p2 = urlparse.urlsplit('tel:+31641044153')
        self.assertEqual(p2.scheme, 'tel')
        self.assertEqual(p2.path, '+31641044153')

        # Assert for urlparse
        p1 = urlparse.urlparse('tel:+31-641044153')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '+31-641044153')

        p2 = urlparse.urlparse('tel:+31641044153')
        self.assertEqual(p2.scheme, 'tel')
        self.assertEqual(p2.path, '+31641044153') 
Example 5
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_telurl_params(self):
        p1 = urlparse.urlparse('tel:123-4;phone-context=+1-650-516')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '123-4')
        self.assertEqual(p1.params, 'phone-context=+1-650-516')

        p1 = urlparse.urlparse('tel:+1-201-555-0123')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '+1-201-555-0123')
        self.assertEqual(p1.params, '')

        p1 = urlparse.urlparse('tel:7042;phone-context=example.com')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '7042')
        self.assertEqual(p1.params, 'phone-context=example.com')

        p1 = urlparse.urlparse('tel:863-1234;phone-context=+1-914-555')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '863-1234')
        self.assertEqual(p1.params, 'phone-context=+1-914-555') 
Example 6
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_attributes_without_netloc(self):
        # This example is straight from RFC 3261.  It looks like it
        # should allow the username, hostname, and port to be filled
        # in, but doesn't.  Since it's a URI and doesn't use the
        # scheme://netloc syntax, the netloc and related attributes
        # should be left empty.
        uri = "sip:[email protected];maddr=239.255.255.1;ttl=15"
        p = urlparse.urlsplit(uri)
        self.assertEqual(p.netloc, "")
        self.assertEqual(p.username, None)
        self.assertEqual(p.password, None)
        self.assertEqual(p.hostname, None)
        self.assertEqual(p.port, None)
        self.assertEqual(p.geturl(), uri)

        p = urlparse.urlparse(uri)
        self.assertEqual(p.netloc, "")
        self.assertEqual(p.username, None)
        self.assertEqual(p.password, None)
        self.assertEqual(p.hostname, None)
        self.assertEqual(p.port, None)
        self.assertEqual(p.geturl(), uri) 
Example 7
Project: pyblish-win   Author: pyblish   File: ssl_servers.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def translate_path(self, path):
        """Translate a /-separated PATH to the local filename syntax.

        Components that mean special things to the local file system
        (e.g. drive or directory names) are ignored.  (XXX They should
        probably be diagnosed.)

        """
        # abandon query parameters
        path = urlparse.urlparse(path)[2]
        path = os.path.normpath(urllib.unquote(path))
        words = path.split('/')
        words = filter(None, words)
        path = self.root
        for word in words:
            drive, word = os.path.splitdrive(word)
            head, word = os.path.split(word)
            path = os.path.join(path, word)
        return path 
Example 8
Project: weibo-chaohua-daily   Author: LiuChangFreeman   File: main.py    MIT License 6 votes vote down vote up
def resolve(card_group):
    chaohua_list = []
    for card in card_group:
        if card['card_type'] == 8:
            scheme = card['scheme']
            if version_info.major == 2:
                import urlparse
                query = urlparse.urlparse(scheme).query
            else:
                from urllib.parse import urlparse
                query = urlparse(scheme).query
            parmsList = query.split('&')
            containerid = ''
            for parm in parmsList:
                r = parm.split('=')
                if r[0] == 'containerid':
                    containerid = r[1]
                    break
            chaohua = {
                'title_sub': card['title_sub'].encode("utf-8"),
                'containerid': containerid
            }
            chaohua_list.append(chaohua)
    return chaohua_list 
Example 9
Project: video2commons   Author: toolforge   File: app.py    GNU General Public License v3.0 6 votes vote down vote up
def loginredirect():
    """Initialize OAuth login."""
    app.session_interface.abandon_session(app, session)

    redirecturl, request_token = handshaker.initiate()
    session['request_token_key'], session['request_token_secret'] = \
        request_token.key, request_token.secret
    session['return_to_url'] = url_for('main')

    returnto = request.args.get('returnto')
    if returnto:
        ref_url = urlparse(request.url_root)
        test_url = urlparse(urljoin(request.host_url, returnto))
        if (
            test_url.scheme == ref_url.scheme and
            test_url.netloc == ref_url.netloc and
            test_url.path.startswith(ref_url.path)
        ):
            session['return_to_url'] = returnto

    return redirect(redirecturl) 
Example 10
Project: flasky   Author: RoseOu   File: flask_login.py    MIT License 6 votes vote down vote up
def make_next_param(login_url, current_url):
    '''
    Reduces the scheme and host from a given URL so it can be passed to
    the given `login` URL more efficiently.

    :param login_url: The login URL being redirected to.
    :type login_url: str
    :param current_url: The URL to reduce.
    :type current_url: str
    '''
    l = urlparse(login_url)
    c = urlparse(current_url)

    if (not l.scheme or l.scheme == c.scheme) and \
            (not l.netloc or l.netloc == c.netloc):
        return urlunparse(('', '', c.path, c.params, c.query, ''))
    return current_url 
Example 11
Project: core   Author: lifemapper   File: lm_client.py    GNU General Public License v3.0 6 votes vote down vote up
def login(self, userId, passwd):
        """Logs in to the server
        """
        policyServer = urlparse(self.server).netloc
        policy = cookielib.DefaultCookiePolicy(
            allowed_domains=(policyServer,))
        self.cookieJar = cookielib.LWPCookieJar(policy=policy)
        opener = urllib2.build_opener(
            urllib2.HTTPCookieProcessor(self.cookieJar))
        urllib2.install_opener(opener)
        
        req = self._make_request(
            self._build_base_url(_SERVICE.LOGIN), userid=userId, pword=passwd,
            method=HTTPMethod.POST)
        resp = req.read()
        req.close()
        return resp

    # ............................ 
Example 12
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: diagnose.py    Apache License 2.0 6 votes vote down vote up
def test_connection(name, url, timeout=10):
    """Simple connection test"""
    urlinfo = urlparse(url)
    start = time.time()
    try:
        ip = socket.gethostbyname(urlinfo.netloc)
    except Exception as e:
        print('Error resolving DNS for {}: {}, {}'.format(name, url, e))
        return
    dns_elapsed = time.time() - start
    start = time.time()
    try:
        _ = urlopen(url, timeout=timeout)
    except Exception as e:
        print("Error open {}: {}, {}, DNS finished in {} sec.".format(name, url, e, dns_elapsed))
        return
    load_elapsed = time.time() - start
    print("Timing for {}: {}, DNS: {:.4f} sec, LOAD: {:.4f} sec.".format(name, url, dns_elapsed, load_elapsed)) 
Example 13
Project: sqliv   Author: the-robot   File: reverseip.py    GNU General Public License v3.0 6 votes vote down vote up
def reverseip(url):
    """return domains from given the same server"""

    # get only domain name
    url = urlparse(url).netloc if urlparse(url).netloc != '' else urlparse(url).path.split("/")[0]

    source = "http://domains.yougetsignal.com/domains.php"
    useragent = useragents.get()
    contenttype = "application/x-www-form-urlencoded; charset=UTF-8"

    # POST method
    opener = urllib2.build_opener(
        urllib2.HTTPHandler(), urllib2.HTTPSHandler())
    data = urllib.urlencode([('remoteAddress', url), ('key', '')])

    request = urllib2.Request(source, data)
    request.add_header("Content-type", contenttype)
    request.add_header("User-Agent", useragent)

    try:
        result = urllib2.urlopen(request).read()

    except urllib2.HTTPError, e:
        print >> sys.stderr, "[{}] HTTP error".format(e.code) 
Example 14
Project: sqliv   Author: the-robot   File: scanner.py    GNU General Public License v3.0 6 votes vote down vote up
def __sqli(url):
    """check SQL injection vulnerability"""

    std.stdout("scanning {}".format(url), end="")

    domain = url.split("?")[0]  # domain with path without queries
    queries = urlparse(url).query.split("&")
    # no queries in url
    if not any(queries):
        print "" # move cursor to new line
        return False, None

    payloads = ("'", "')", "';", '"', '")', '";', '`', '`)', '`;', '\\', "%27", "%%2727", "%25%27", "%60", "%5C")
    for payload in payloads:
        website = domain + "?" + ("&".join([param + payload for param in queries]))
        source = web.gethtml(website)
        if source:
            vulnerable, db = sqlerrors.check(source)
            if vulnerable and db != None:
                std.showsign(" vulnerable")
                return True, db

    print ""  # move cursor to new line
    return False, None 
Example 15
Project: InsightAgent   Author: insightfinder   File: getmetrics_cadvisor.py    Apache License 2.0 6 votes vote down vote up
def check_project(project_name):
    if 'token' in if_config_vars and len(if_config_vars['token']) != 0:
        logger.debug(project_name)
        try:
            # check for existing project
            check_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/getprojectstatus')
            output_check_project = subprocess.check_output('curl "' + check_url + '?userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectList=%5B%7B%22projectName%22%3A%22' + project_name + '%22%2C%22customerName%22%3A%22' + if_config_vars['user_name'] + '%22%2C%22projectType%22%3A%22CUSTOM%22%7D%5D&tzOffset=-14400000"', shell=True)
            # create project if no existing project
            if project_name not in output_check_project:
                logger.debug('creating project')
                create_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/add-custom-project')
                output_create_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectName=' + project_name + '&instanceType=PrivateCloud&projectCloudType=PrivateCloud&dataType=' + get_data_type_from_project_type() + '&samplingInterval=' + str(if_config_vars['sampling_interval'] / 60) +  '&samplingIntervalInSeconds=' + str(if_config_vars['sampling_interval']) + '&zone=&email=&access-key=&secrete-key=&insightAgentType=' + get_insight_agent_type_from_project_type() + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + create_url + '?tzOffset=-18000000', shell=True)
            # set project name to proposed name
            if_config_vars['project_name'] = project_name
            # try to add new project to system
            if 'system_name' in if_config_vars and len(if_config_vars['system_name']) != 0:
                system_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/projects/update')
                output_update_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&operation=updateprojsettings&projectName=' + project_name + '&systemName=' + if_config_vars['system_name'] + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + system_url + '?tzOffset=-18000000', shell=True)
        except subprocess.CalledProcessError as e:
            logger.error('Unable to create project for ' + project_name + '. Data will be sent to ' + if_config_vars['project_name']) 
Example 16
Project: InsightAgent   Author: insightfinder   File: getmetrics_cadvisor.py    Apache License 2.0 6 votes vote down vote up
def send_data_to_if(chunk_metric_data):
    send_data_time = time.time()

    # prepare data for metric streaming agent
    data_to_post = initialize_api_post_data()
    if 'DEPLOYMENT' in if_config_vars['project_type'] or 'INCIDENT' in if_config_vars['project_type']:
        for chunk in chunk_metric_data:
            chunk['data'] = json.dumps(chunk['data'])
    data_to_post[get_data_field_from_project_type()] = json.dumps(chunk_metric_data)

    logger.debug('First:\n' + str(chunk_metric_data[0]))
    logger.debug('Last:\n' + str(chunk_metric_data[-1]))
    logger.debug('Total Data (bytes): ' + str(get_json_size_bytes(data_to_post)))
    logger.debug('Total Lines: ' + str(track['line_count']))

    # do not send if only testing
    if cli_config_vars['testing']:
        return

    # send the data
    post_url = urlparse.urljoin(if_config_vars['if_url'], get_api_from_project_type())
    send_request(post_url, 'POST', 'Could not send request to IF',
                 str(get_json_size_bytes(data_to_post)) + ' bytes of data are reported.',
                 data=data_to_post, proxies=if_config_vars['if_proxies'])
    logger.debug('--- Send data time: %s seconds ---' % round(time.time() - send_data_time, 2)) 
Example 17
Project: InsightAgent   Author: insightfinder   File: getlogs_k8s.py    Apache License 2.0 6 votes vote down vote up
def check_project(project_name):
    if 'token' in if_config_vars and len(if_config_vars['token']) != 0:
        logger.debug(project_name)
        try:
            # check for existing project
            check_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/getprojectstatus')
            output_check_project = subprocess.check_output('curl "' + check_url + '?userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectList=%5B%7B%22projectName%22%3A%22' + project_name + '%22%2C%22customerName%22%3A%22' + if_config_vars['user_name'] + '%22%2C%22projectType%22%3A%22CUSTOM%22%7D%5D&tzOffset=-14400000"', shell=True)
            # create project if no existing project
            if project_name not in output_check_project:
                logger.debug('creating project')
                create_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/add-custom-project')
                output_create_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectName=' + project_name + '&instanceType=PrivateCloud&projectCloudType=PrivateCloud&dataType=' + get_data_type_from_project_type() + '&samplingInterval=' + str(if_config_vars['sampling_interval'] / 60) +  '&samplingIntervalInSeconds=' + str(if_config_vars['sampling_interval']) + '&zone=&email=&access-key=&secrete-key=&insightAgentType=' + get_insight_agent_type_from_project_type() + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + create_url + '?tzOffset=-18000000', shell=True)
            # set project name to proposed name
            if_config_vars['project_name'] = project_name
            # try to add new project to system
            if 'system_name' in if_config_vars and len(if_config_vars['system_name']) != 0:
                system_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/projects/update')
                output_update_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&operation=updateprojsettings&projectName=' + project_name + '&systemName=' + if_config_vars['system_name'] + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + system_url + '?tzOffset=-18000000', shell=True)
        except subprocess.CalledProcessError as e:
            logger.error('Unable to create project for ' + project_name + '. Data will be sent to ' + if_config_vars['project_name']) 
Example 18
Project: InsightAgent   Author: insightfinder   File: getlogs_k8s.py    Apache License 2.0 6 votes vote down vote up
def send_data_to_if(chunk_metric_data):
    send_data_time = time.time()

    # prepare data for metric streaming agent
    data_to_post = initialize_api_post_data()
    if 'DEPLOYMENT' in if_config_vars['project_type'] or 'INCIDENT' in if_config_vars['project_type']:
        for chunk in chunk_metric_data:
            chunk['data'] = json.dumps(chunk['data'])
    data_to_post[get_data_field_from_project_type()] = json.dumps(chunk_metric_data)

    logger.debug('First:\n' + str(chunk_metric_data[0]))
    logger.debug('Last:\n' + str(chunk_metric_data[-1]))
    logger.debug('Total Data (bytes): ' + str(get_json_size_bytes(data_to_post)))
    logger.debug('Total Lines: ' + str(track['line_count']))

    # do not send if only testing
    if cli_config_vars['testing']:
        return

    # send the data
    post_url = urlparse.urljoin(if_config_vars['if_url'], get_api_from_project_type())
    send_request(post_url, 'POST', 'Could not send request to IF',
                 str(get_json_size_bytes(data_to_post)) + ' bytes of data are reported.',
                 data=data_to_post, proxies=if_config_vars['if_proxies'])
    logger.debug('--- Send data time: %s seconds ---' % round(time.time() - send_data_time, 2)) 
Example 19
Project: InsightAgent   Author: insightfinder   File: getmessages_prometheus.py    Apache License 2.0 6 votes vote down vote up
def check_project(project_name):
    if 'token' in if_config_vars and len(if_config_vars['token']) != 0:
        logger.debug(project_name)
        try:
            # check for existing project
            check_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/getprojectstatus')
            output_check_project = subprocess.check_output('curl "' + check_url + '?userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectList=%5B%7B%22projectName%22%3A%22' + project_name + '%22%2C%22customerName%22%3A%22' + if_config_vars['user_name'] + '%22%2C%22projectType%22%3A%22CUSTOM%22%7D%5D&tzOffset=-14400000"', shell=True)
            # create project if no existing project
            if project_name not in output_check_project:
                logger.debug('creating project')
                create_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/add-custom-project')
                output_create_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectName=' + project_name + '&instanceType=PrivateCloud&projectCloudType=PrivateCloud&dataType=' + get_data_type_from_project_type() + '&samplingInterval=' + str(if_config_vars['sampling_interval'] / 60) +  '&samplingIntervalInSeconds=' + str(if_config_vars['sampling_interval']) + '&zone=&email=&access-key=&secrete-key=&insightAgentType=' + get_insight_agent_type_from_project_type() + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + create_url + '?tzOffset=-18000000', shell=True)
            # set project name to proposed name
            if_config_vars['project_name'] = project_name
            # try to add new project to system
            if 'system_name' in if_config_vars and len(if_config_vars['system_name']) != 0:
                system_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/projects/update')
                output_update_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&operation=updateprojsettings&projectName=' + project_name + '&systemName=' + if_config_vars['system_name'] + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + system_url + '?tzOffset=-18000000', shell=True)
        except subprocess.CalledProcessError as e:
            logger.error('Unable to create project for ' + project_name + '. Data will be sent to ' + if_config_vars['project_name']) 
Example 20
Project: InsightAgent   Author: insightfinder   File: getmessages_prometheus.py    Apache License 2.0 6 votes vote down vote up
def send_data_to_if(chunk_metric_data):
    send_data_time = time.time()

    # prepare data for metric streaming agent
    data_to_post = initialize_api_post_data()
    if 'DEPLOYMENT' in if_config_vars['project_type'] or 'INCIDENT' in if_config_vars['project_type']:
        for chunk in chunk_metric_data:
            chunk['data'] = json.dumps(chunk['data'])
    data_to_post[get_data_field_from_project_type()] = json.dumps(chunk_metric_data)

    logger.debug('First:\n' + str(chunk_metric_data[0]))
    logger.debug('Last:\n' + str(chunk_metric_data[-1]))
    logger.debug('Total Data (bytes): ' + str(get_json_size_bytes(data_to_post)))
    logger.debug('Total Lines: ' + str(track['line_count']))

    # do not send if only testing
    if cli_config_vars['testing']:
        return

    # send the data
    post_url = urlparse.urljoin(if_config_vars['if_url'], get_api_from_project_type())
    send_request(post_url, 'POST', 'Could not send request to IF',
                 str(get_json_size_bytes(data_to_post)) + ' bytes of data are reported.',
                 data=data_to_post, proxies=if_config_vars['if_proxies'])
    logger.debug('--- Send data time: %s seconds ---' % round(time.time() - send_data_time, 2)) 
Example 21
Project: subtake   Author: kp625544   File: sublist3r.py    GNU General Public License v2.0 6 votes vote down vote up
def extract_domains(self, resp):
        link_regx = re.compile('<cite.*?>(.*?)<\/cite>')
        try:
            links_list = link_regx.findall(resp)
            for link in links_list:
                link = re.sub('<span.*>', '', link)
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
                if subdomain and subdomain not in self.subdomains and subdomain != self.domain:
                    if self.verbose:
                        self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
                    self.subdomains.append(subdomain.strip())
        except Exception:
            pass
        return links_list 
Example 22
Project: subtake   Author: kp625544   File: sublist3r.py    GNU General Public License v2.0 6 votes vote down vote up
def extract_domains(self, resp):
        link_regx2 = re.compile('<span class=" fz-15px fw-m fc-12th wr-bw.*?">(.*?)</span>')
        link_regx = re.compile('<span class="txt"><span class=" cite fw-xl fz-15px">(.*?)</span>')
        links_list = []
        try:
            links = link_regx.findall(resp)
            links2 = link_regx2.findall(resp)
            links_list = links + links2
            for link in links_list:
                link = re.sub("<(\/)?b>", "", link)
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
                if not subdomain.endswith(self.domain):
                    continue
                if subdomain and subdomain not in self.subdomains and subdomain != self.domain:
                    if self.verbose:
                        self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
                    self.subdomains.append(subdomain.strip())
        except Exception:
            pass

        return links_list 
Example 23
Project: navitia_client   Author: leonardbinet   File: __init__.py    MIT License 5 votes vote down vote up
def assertURLEqual(self, first, second, msg=None):
        """Check that two arguments are equivalent URLs. Ignores the order of
        query arguments.
        """
        first_parsed = urlparse(first)
        second_parsed = urlparse(second)
        self.assertEqual(first_parsed[:3], second_parsed[:3], msg)

        first_qsl = sorted(parse_qsl(first_parsed.query))
        second_qsl = sorted(parse_qsl(second_parsed.query))
        self.assertEqual(first_qsl, second_qsl, msg) 
Example 24
Project: pyblish-win   Author: pyblish   File: webchecker.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def addroot(self, root, add_to_do = 1):
        if root not in self.roots:
            troot = root
            scheme, netloc, path, params, query, fragment = \
                    urlparse.urlparse(root)
            i = path.rfind("/") + 1
            if 0 < i < len(path):
                path = path[:i]
                troot = urlparse.urlunparse((scheme, netloc, path,
                                             params, query, fragment))
            self.roots.append(troot)
            self.addrobot(root)
            if add_to_do:
                self.newlink((root, ""), ("<root>", root)) 
Example 25
Project: pyblish-win   Author: pyblish   File: webchecker.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def addrobot(self, root):
        root = urlparse.urljoin(root, "/")
        if self.robots.has_key(root): return
        url = urlparse.urljoin(root, "/robots.txt")
        self.robots[root] = rp = robotparser.RobotFileParser()
        self.note(2, "Parsing %s", url)
        rp.debug = self.verbose > 3
        rp.set_url(url)
        try:
            rp.read()
        except (OSError, IOError), msg:
            self.note(1, "I/O error parsing %s: %s", url, msg) 
Example 26
Project: pyblish-win   Author: pyblish   File: webchecker.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def isallowed(self, root, url):
        root = urlparse.urljoin(root, "/")
        return self.robots[root].can_fetch(AGENTNAME, url) 
Example 27
Project: pyblish-win   Author: pyblish   File: cookielib.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def request_host(request):
    """Return request-host, as defined by RFC 2965.

    Variation from RFC: returned value is lowercased, for convenient
    comparison.

    """
    url = request.get_full_url()
    host = urlparse.urlparse(url)[1]
    if host == "":
        host = request.get_header("Host", "")

    # remove port, if present
    host = cut_port_re.sub("", host, 1)
    return host.lower() 
Example 28
Project: pyblish-win   Author: pyblish   File: urllib2.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def request_host(request):
    """Return request-host, as defined by RFC 2965.

    Variation from RFC: returned value is lowercased, for convenient
    comparison.

    """
    url = request.get_full_url()
    host = urlparse.urlparse(url)[1]
    if host == "":
        host = request.get_header("Host", "")

    # remove port, if present
    host = _cut_port_re.sub("", host, 1)
    return host.lower() 
Example 29
Project: pyblish-win   Author: pyblish   File: urllib2.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def http_error_401(self, req, fp, code, msg, headers):
        host = urlparse.urlparse(req.get_full_url())[1]
        retry = self.http_error_auth_reqed('www-authenticate',
                                           host, req, headers)
        self.reset_retry_count()
        return retry 
Example 30
Project: pyblish-win   Author: pyblish   File: robotparser.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def set_url(self, url):
        """Sets the URL referring to a robots.txt file."""
        self.url = url
        self.host, self.path = urlparse.urlparse(url)[1:3] 
Example 31
Project: pyblish-win   Author: pyblish   File: robotparser.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, path, allowance):
        if path == '' and not allowance:
            # an empty value means allow all
            allowance = True
        path = urlparse.urlunparse(urlparse.urlparse(path))
        self.path = urllib.quote(path)
        self.allowance = allowance 
Example 32
Project: pyblish-win   Author: pyblish   File: test_support.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def open_urlresource(url, check=None):
    import urlparse, urllib2

    filename = urlparse.urlparse(url)[2].split('/')[-1] # '/': it's URL!

    fn = os.path.join(os.path.dirname(__file__), "data", filename)

    def check_valid_file(fn):
        f = open(fn)
        if check is None:
            return f
        elif check(f):
            f.seek(0)
            return f
        f.close()

    if os.path.exists(fn):
        f = check_valid_file(fn)
        if f is not None:
            return f
        unlink(fn)

    # Verify the requirement before downloading the file
    requires('urlfetch')

    print >> get_original_stdout(), '\tfetching %s ...' % url
    f = urllib2.urlopen(url, timeout=15)
    try:
        with open(fn, "wb") as out:
            s = f.read()
            while s:
                out.write(s)
                s = f.read()
    finally:
        f.close()

    f = check_valid_file(fn)
    if f is not None:
        return f
    raise TestFailed('invalid resource "%s"' % fn) 
Example 33
Project: pyblish-win   Author: pyblish   File: test_urllib2_localnet.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def do_GET(self):
        (scm, netloc, path, params, query, fragment) = urlparse.urlparse(
            self.path, 'http')
        self.short_path = path
        if self.digest_auth_handler.handle_request(self):
            self.send_response(200, "OK")
            self.send_header("Content-Type", "text/html")
            self.end_headers()
            self.wfile.write("You've reached %s!<BR>" % self.path)
            self.wfile.write("Our apologies, but our server is down due to "
                              "a sudden zombie invasion.")

# Test cases 
Example 34
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_http_roundtrips(self):
        # urlparse.urlsplit treats 'http:' as an optimized special case,
        # so we test both 'http:' and 'https:' in all the following.
        # Three cheers for white box knowledge!
        testcases = [
            ('://www.python.org',
             ('www.python.org', '', '', '', ''),
             ('www.python.org', '', '', '')),
            ('://www.python.org#abc',
             ('www.python.org', '', '', '', 'abc'),
             ('www.python.org', '', '', 'abc')),
            ('://www.python.org?q=abc',
             ('www.python.org', '', '', 'q=abc', ''),
             ('www.python.org', '', 'q=abc', '')),
            ('://www.python.org/#abc',
             ('www.python.org', '/', '', '', 'abc'),
             ('www.python.org', '/', '', 'abc')),
            ('://a/b/c/d;p?q#f',
             ('a', '/b/c/d', 'p', 'q', 'f'),
             ('a', '/b/c/d;p', 'q', 'f')),
            ]
        for scheme in ('http', 'https'):
            for url, parsed, split in testcases:
                url = scheme + url
                parsed = (scheme,) + parsed
                split = (scheme,) + split
                self.checkRoundtrips(url, parsed, split) 
Example 35
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def checkJoin(self, base, relurl, expected):
        self.assertEqual(urlparse.urljoin(base, relurl), expected,
                         (base, relurl, expected)) 
Example 36
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_unparse_parse(self):
        for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]:
            self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
            self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u) 
Example 37
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_RFC2368(self):
        # Issue 11467: path that starts with a number is not parsed correctly
        self.assertEqual(urlparse.urlparse('mailto:[email protected]'),
                ('mailto', '', '[email protected]', '', '', '')) 
Example 38
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_attributes_bad_port(self):
        """Check handling of non-integer ports."""
        p = urlparse.urlsplit("http://www.example.net:foo")
        self.assertEqual(p.netloc, "www.example.net:foo")
        self.assertRaises(ValueError, lambda: p.port)

        p = urlparse.urlparse("http://www.example.net:foo")
        self.assertEqual(p.netloc, "www.example.net:foo")
        self.assertRaises(ValueError, lambda: p.port) 
Example 39
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_noslash(self):
        # Issue 1637: http://foo.com?query is legal
        self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
                         ('http', 'example.com', '', '', 'blahblah=/foo', '')) 
Example 40
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_anyscheme(self):
        # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
        self.assertEqual(urlparse.urlparse("s3://foo.com/stuff"),
                         ('s3','foo.com','/stuff','','',''))
        self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"),
                         ('x-newscheme','foo.com','/stuff','','',''))
        self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
                         ('x-newscheme','foo.com','/stuff','','query','fragment'))
        self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query"),
                         ('x-newscheme','foo.com','/stuff','','query','')) 
Example 41
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_withoutscheme(self):
        # Test urlparse without scheme
        # Issue 754016: urlparse goes wrong with IP:port without scheme
        # RFC 1808 specifies that netloc should start with //, urlparse expects
        # the same, otherwise it classifies the portion of url as path.
        self.assertEqual(urlparse.urlparse("path"),
                ('','','path','','',''))
        self.assertEqual(urlparse.urlparse("//www.python.org:80"),
                ('','www.python.org:80','','','',''))
        self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
                ('http','www.python.org:80','','','','')) 
Example 42
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_portseparator(self):
        # Issue 754016 makes changes for port separator ':' from scheme separator
        self.assertEqual(urlparse.urlparse("path:80"),
                ('','','path:80','','',''))
        self.assertEqual(urlparse.urlparse("http:"),('http','','','','',''))
        self.assertEqual(urlparse.urlparse("https:"),('https','','','','',''))
        self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
                ('http','www.python.org:80','','','','')) 
Example 43
Project: Flask-Python-GAE-Login-Registration   Author: orymeyer   File: lint.py    Apache License 2.0 5 votes vote down vote up
def check_headers(self, headers):
        etag = headers.get('etag')
        if etag is not None:
            if etag.startswith('w/'):
                etag = etag[2:]
            if not (etag[:1] == etag[-1:] == '"'):
                warn(HTTPWarning('unquoted etag emitted.'), stacklevel=4)

        location = headers.get('location')
        if location is not None:
            if not urlparse(location).netloc:
                warn(HTTPWarning('absolute URLs required for location header'),
                     stacklevel=4) 
Example 44
Project: Flask-Python-GAE-Login-Registration   Author: orymeyer   File: lint.py    Apache License 2.0 5 votes vote down vote up
def check_headers(self, headers):
        etag = headers.get('etag')
        if etag is not None:
            if etag.startswith('w/'):
                etag = etag[2:]
            if not (etag[:1] == etag[-1:] == '"'):
                warn(HTTPWarning('unquoted etag emitted.'), stacklevel=4)

        location = headers.get('location')
        if location is not None:
            if not urlparse(location).netloc:
                warn(HTTPWarning('absolute URLs required for location header'),
                     stacklevel=4) 
Example 45
Project: slidoc   Author: mitotic   File: sdviewer.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def do_POST(self):
        parsed_url = urlparse.urlparse(self.path)
        form = cgi.FieldStorage(
            fp=self.rfile,
            headers=self.headers,
            environ={'REQUEST_METHOD':'POST',
                     'CONTENT_TYPE':self.headers['Content-Type'],
                     })
        if parsed_url.path == '/generated':
            html = base64.b64decode(form['html'].value)
            if gen_hmac_token(html) != form['hmac'].value:
                self.respond('ERROR: HMAC mismatch')
            else:
                if 'view' in form and form['view'].value:
                    self.respond(html)
                else:
                    self.respond(html, filename=form['filename'].value)
            return

        filename = form['file'].filename
        cmd_args = []
        for arg in ("printable", "verbose",):
            if arg in form and form[arg].value:
                cmd_args.append("--"+arg)
        if "indexed" in form and form["indexed"].value:
            cmd_args.append("--all=%s --indexed=toc,ind,qind" % filename)
        for arg in ("pace", "gsheet_url", "gsheet_login"):
            if arg in form and form[arg].value:
                cmd_args.append("--"+arg+"="+form[arg].value)
        errmsg, outname, html, messages = process_files([form['file'].file], [filename], cmd_args)
        if errmsg:
            self.respond('<p><a href="/"><b>Home</b></a></p>ERROR: '+cgi.escape(errmsg))
        else:
            self.respond(html_response % {'args':' '.join(cmd_args), 'filename': outname,
                                          'hmac': gen_hmac_token(html), 'html':base64.b64encode(html),
                                          'messages': '\n'.join(cgi.escape(x) for x in messages)}) 
Example 46
Project: slidoc   Author: mitotic   File: sdviewer.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def do_GET(self):
        parsed_url = urlparse.urlparse(self.path)
        params = urlparse.parse_qs(parsed_url.query)
        if parsed_url.path.startswith('/http:') or parsed_url.path.startswith('/https:'):
            url = parsed_url.path[1:]
            if parsed_url.query:
                url += '?' + parsed_url.query
            parsed_suburl = urlparse.urlparse(url)
            filename = os.path.basename(parsed_suburl.path) or 'file.md'
            req = urllib2.Request(url)
            try:
                response = urllib2.urlopen(req)
                file = StringIO(response.read())
            except Exception, excp:
                self.respond(cgi.escape('ERROR in accessing URL %s: %s' % (url, excp)))
                return
            cmd_args = []
            for arg in ("pace", "gsheet_url"):
                if arg in params and params[arg][0]:
                    cmd_args.append("--"+arg+"="+params[arg][0])
                    if arg == "gsheet_url":
                        cmd_args.append("--gsheet_login=")
            errmsg, outname, html, messages = process_files([file], [filename], cmd_args)
            if errmsg:
                self.respond('ERROR: '+errmsg)
            else:
                self.respond(html) 
Example 47
Project: petuk.corp   Author: fnugrahendi   File: wget.py    GNU General Public License v2.0 5 votes vote down vote up
def filename_from_url(url):
    """:return: detected filename or None"""
    fname = os.path.basename(urlparse.urlparse(url).path)
    if len(fname.strip(" \n\t.")) == 0:
        return None
    return fname 
Example 48
Project: video2commons   Author: toolforge   File: __init__.py    GNU General Public License v3.0 5 votes vote down vote up
def url_blacklisted(url):
    """Define download url blacklist."""
    parseresult = urlparse(url)
    if parseresult.scheme in ['http', 'https']:
        if parseresult.netloc.endswith('.googlevideo.com'):
            raise TaskError('Your downloading URL has been blacklisted.') 
Example 49
Project: pnp   Author: HazardDede   File: notify.py    MIT License 5 votes vote down vote up
def _load_deps(self):
        pbullet = load_optional_module('pushbullet', self.EXTRA)
        try:
            import urlparse
        except:  # For Python 3, pylint: disable=bare-except
            import urllib.parse as urlparse

        return pbullet, urlparse 
Example 50
Project: pnp   Author: HazardDede   File: notify.py    MIT License 5 votes vote down vote up
def _guess_mimetype(self, url):
        import mimetypes
        _, urlparse = self._load_deps()
        res = urlparse.urlparse(url)
        if not res.path:
            return None
        mtype, _ = mimetypes.guess_type(res.path)
        return mtype