Python urlparse.urlunsplit() Examples

The following are 30 code examples of urlparse.urlunsplit(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module urlparse , or try the search function .
Example #1
Source File: utils.py    From kano-burners with GNU General Public License v2.0 6 votes vote down vote up
def url_fix(s, charset='utf-8'):
    '''
    Sometimes you get an URL by a user that just isn't a real
    URL because it contains unsafe characters like ' ' and so on.  This
    function can fix some of the problems in a similar way browsers
    handle data entered by the user:

    >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffsklärung)')
    'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29'

    :param s: Url address.
    :type s: string
    :param charset: The target charset for the URL if the url was
                    given as unicode string. Default is 'utf-8'.
    :type charset: string
    :rtype: string
                    
    (taken from `werkzeug.utils <http://werkzeug.pocoo.org/docs/utils/>`_)
    '''
    if sys.version_info < (3, 0) and isinstance(s, unicode):
        s = s.encode(charset, 'ignore')
    scheme, netloc, path, qs, anchor = urlparse.urlsplit(s)
    path = urllib.quote(path, '/%')
    qs = urllib.quote_plus(qs, ':&=')
    return urlparse.urlunsplit((scheme, netloc, path, qs, anchor)) 
Example #2
Source File: fixture.py    From mishkal with GNU General Public License v3.0 6 votes vote down vote up
def goto(self, href, method='get', **args):
        """
        Go to the (potentially relative) link ``href``, using the
        given method (``'get'`` or ``'post'``) and any extra arguments
        you want to pass to the ``app.get()`` or ``app.post()``
        methods.

        All hostnames and schemes will be ignored.
        """
        scheme, host, path, query, fragment = urlparse.urlsplit(href)
        # We
        scheme = host = fragment = ''
        href = urlparse.urlunsplit((scheme, host, path, query, fragment))
        href = urlparse.urljoin(self.request.full_url, href)
        method = method.lower()
        assert method in ('get', 'post'), (
            'Only "get" or "post" are allowed for method (you gave %r)'
            % method)
        if method == 'get':
            method = self.test_app.get
        else:
            method = self.test_app.post
        return method(href, **args) 
Example #3
Source File: net.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found']) 
Example #4
Source File: curl.py    From pypath with GNU General Public License v3.0 6 votes vote down vote up
def url_fix(self, charset = 'utf-8'):
        """
        From http://stackoverflow.com/a/121017/854988
        """

        if self.bypass_url_encoding:

            return

        if type(self.url) is bytes:

            self.url = self._bytes_to_unicode(self.url, encoding = charset)

        scheme, netloc, path, qs, anchor = urlparse.urlsplit(self.url)

        if self.force_quote or not self.is_quoted(path):

            path = urllib.quote(path, '/%')

        if self.force_quote or not self.is_quoted_plus(qs):

            qs = urllib.quote_plus(qs, '& = ')

        self.url = urlparse.urlunsplit((scheme, netloc, path, qs, anchor)) 
Example #5
Source File: validators.py    From luscan-devel with GNU General Public License v2.0 6 votes vote down vote up
def __call__(self, value):
        try:
            super(URLValidator, self).__call__(value)
        except ValidationError as e:
            # Trivial case failed. Try for possible IDN domain
            if value:
                value = force_text(value)
                scheme, netloc, path, query, fragment = urlsplit(value)
                try:
                    netloc = netloc.encode('idna').decode('ascii')  # IDN -> ACE
                except UnicodeError:  # invalid domain part
                    raise e
                url = urlunsplit((scheme, netloc, path, query, fragment))
                super(URLValidator, self).__call__(url)
            else:
                raise
        else:
            url = value 
Example #6
Source File: html.py    From luscan-devel with GNU General Public License v2.0 6 votes vote down vote up
def smart_urlquote(url):
    "Quotes a URL if it isn't already quoted."
    # Handle IDN before quoting.
    scheme, netloc, path, query, fragment = urlsplit(url)
    try:
        netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE
    except UnicodeError: # invalid domain part
        pass
    else:
        url = urlunsplit((scheme, netloc, path, query, fragment))

    url = unquote(force_str(url))
    # See http://bugs.python.org/issue2637
    url = quote(url, safe=b'!*\'();:@&=+$,/?#[]~')

    return force_text(url) 
Example #7
Source File: feedparser.py    From xbmc-addons-chinese with GNU General Public License v2.0 6 votes vote down vote up
def _convert_to_idn(url):
    """Convert a URL to IDN notation"""
    # this function should only be called with a unicode string
    # strategy: if the host cannot be encoded in ascii, then
    # it'll be necessary to encode it in idn form
    parts = list(urlparse.urlsplit(url))
    try:
        parts[1].encode('ascii')
    except UnicodeEncodeError:
        # the url needs to be converted to idn notation
        host = parts[1].rsplit(':', 1)
        newhost = []
        port = u''
        if len(host) == 2:
            port = host.pop()
        for h in host[0].split('.'):
            newhost.append(h.encode('idna').decode('utf-8'))
        parts[1] = '.'.join(newhost)
        if port:
            parts[1] += ':' + port
        return urlparse.urlunsplit(parts)
    else:
        return url 
Example #8
Source File: feedparser.py    From RSSNewsGAE with Apache License 2.0 6 votes vote down vote up
def _convert_to_idn(url):
    """Convert a URL to IDN notation"""
    # this function should only be called with a unicode string
    # strategy: if the host cannot be encoded in ascii, then
    # it'll be necessary to encode it in idn form
    parts = list(urlparse.urlsplit(url))
    try:
        parts[1].encode('ascii')
    except UnicodeEncodeError:
        # the url needs to be converted to idn notation
        host = parts[1].rsplit(':', 1)
        newhost = []
        port = u''
        if len(host) == 2:
            port = host.pop()
        for h in host[0].split('.'):
            newhost.append(h.encode('idna').decode('utf-8'))
        parts[1] = '.'.join(newhost)
        if port:
            parts[1] += ':' + port
        return urlparse.urlunsplit(parts)
    else:
        return url 
Example #9
Source File: net.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found']) 
Example #10
Source File: utils.py    From openprocurement.api with Apache License 2.0 6 votes vote down vote up
def generate_docservice_url(request, doc_id, temporary=True, prefix=None):
    docservice_key = getattr(request.registry, 'docservice_key', None)
    parsed_url = urlparse(request.registry.docservice_url)
    query = {}
    if temporary:
        expires = int(ttime()) + 300  # EXPIRES
        mess = "{}\0{}".format(doc_id, expires)
        query['Expires'] = expires
    else:
        mess = doc_id
    if prefix:
        mess = '{}/{}'.format(prefix, mess)
        query['Prefix'] = prefix
    query['Signature'] = quote(b64encode(docservice_key.signature(mess.encode("utf-8"))))
    query['KeyID'] = docservice_key.hex_vk()[:8]
    return urlunsplit((parsed_url.scheme, parsed_url.netloc, '/get/{}'.format(doc_id), urlencode(query), '')) 
Example #11
Source File: w9_xss.py    From w9scan with GNU General Public License v2.0 6 votes vote down vote up
def audit(arg):

    ooO0oooOoO0 = arg
    II11i = urlparse.urlparse(ooO0oooOoO0)
    i1oOOoo00O0O = urlparse.urlunsplit((II11i.scheme, II11i.netloc, II11i.path, "", ""))
    Oo0Ooo = urlparse.parse_qsl(II11i.query)

    i1111 = ['__VIEWSTATE', 'IbtnEnter.x', 'IbtnEnter.y']
    i11 = ["GET", "POST"]

    for I11 in i11:

        for O0O0OO0O0O0, iiiii in Oo0Ooo:
            if O0O0OO0O0O0 in i1111:
                continue

            debug('[XSS] <%s> %s %s', I11, O0O0OO0O0O0, i1oOOoo00O0O)
            Oo0o0000o0o0 = iI1(I11, i1oOOoo00O0O, Oo0Ooo, O0O0OO0O0O0, iiiii)

            if Oo0o0000o0o0:
                security_info('<%s> %s' % (I11, Oo0o0000o0o0[1]))
                return 
Example #12
Source File: feedparser.py    From telegram-robot-rss with Mozilla Public License 2.0 6 votes vote down vote up
def _convert_to_idn(url):
    """Convert a URL to IDN notation"""
    # this function should only be called with a unicode string
    # strategy: if the host cannot be encoded in ascii, then
    # it'll be necessary to encode it in idn form
    parts = list(urlparse.urlsplit(url))
    try:
        parts[1].encode('ascii')
    except UnicodeEncodeError:
        # the url needs to be converted to idn notation
        host = parts[1].rsplit(':', 1)
        newhost = []
        port = u''
        if len(host) == 2:
            port = host.pop()
        for h in host[0].split('.'):
            newhost.append(h.encode('idna').decode('utf-8'))
        parts[1] = '.'.join(newhost)
        if port:
            parts[1] += ':' + port
        return urlparse.urlunsplit(parts)
    else:
        return url 
Example #13
Source File: server.py    From mattermost-integration-giphy with Apache License 2.0 6 votes vote down vote up
def giphy_translate(text):
    """
    Giphy translate method, uses the Giphy API to find an appropriate gif url
    """

    params = {}
    params['s'] = text
    params['rating'] = RATING
    params['api_key'] = GIPHY_API_KEY

    resp = requests.get('https://api.giphy.com/v1/gifs/translate', params=params, verify=True)

    if resp.status_code is not requests.codes.ok:
        print('Encountered error using Giphy API, text=%s, status=%d, response_body=%s' % (text, resp.status_code, resp.json()))
        return ''

    resp_data = resp.json()

    url = list(urlsplit(resp_data['data']['images']['original']['url']))
    url[0] = SCHEME.lower()
    return urlunsplit(url) 
Example #14
Source File: url.py    From snippet with MIT License 5 votes vote down vote up
def geturl(self):
        scheme = self.scheme if self.scheme else self.url.scheme
        netloc = self.netloc if self.netloc else self.url.netloc
        url = self.path if self.path else self.url.path
        params = self.params if self.params else self.url.params
        query = self.query if self.query else self.url.query
        fragment = self.fragment if self.fragment else self.url.fragment

        if params:
            url = "%s;%s" % (url, params)
        return urlunsplit((scheme, netloc, url, query, fragment)) 
Example #15
Source File: iri2uri.py    From alfred-gmail with MIT License 5 votes vote down vote up
def iri2uri(uri):
    """Convert an IRI to a URI. Note that IRIs must be
    passed in a unicode strings. That is, do not utf-8 encode
    the IRI before passing it into the function."""
    if isinstance(uri, unicode):
        (scheme, authority, path, query, fragment) = urlparse.urlsplit(uri)
        authority = authority.encode("idna")
        # For each character in 'ucschar' or 'iprivate'
        #  1. encode as utf-8
        #  2. then %-encode each octet of that utf-8
        uri = urlparse.urlunsplit((scheme, authority, path, query, fragment))
        uri = "".join([encode(c) for c in uri])
    return uri 
Example #16
Source File: iri2uri.py    From luci-py with Apache License 2.0 5 votes vote down vote up
def iri2uri(uri):
    """Convert an IRI to a URI. Note that IRIs must be
    passed in a unicode strings. That is, do not utf-8 encode
    the IRI before passing it into the function."""
    if isinstance(uri, unicode):
        (scheme, authority, path, query, fragment) = urlparse.urlsplit(uri)
        authority = authority.encode("idna")
        # For each character in 'ucschar' or 'iprivate'
        #  1. encode as utf-8
        #  2. then %-encode each octet of that utf-8
        uri = urlparse.urlunsplit((scheme, authority, path, query, fragment))
        uri = "".join([encode(c) for c in uri])
    return uri 
Example #17
Source File: fields.py    From luscan-devel with GNU General Public License v2.0 5 votes vote down vote up
def to_python(self, value):

        def split_url(url):
            """
            Returns a list of url parts via ``urlparse.urlsplit`` (or raises a
            ``ValidationError`` exception for certain).
            """
            try:
                return list(urlsplit(url))
            except ValueError:
                # urlparse.urlsplit can raise a ValueError with some
                # misformatted URLs.
                raise ValidationError(self.error_messages['invalid'])

        value = super(URLField, self).to_python(value)
        if value:
            url_fields = split_url(value)
            if not url_fields[0]:
                # If no URL scheme given, assume http://
                url_fields[0] = 'http'
            if not url_fields[1]:
                # Assume that if no domain is provided, that the path segment
                # contains the domain.
                url_fields[1] = url_fields[2]
                url_fields[2] = ''
                # Rebuild the url_fields list, since the domain segment may now
                # contain the path too.
                url_fields = split_url(urlunsplit(url_fields))
            if not url_fields[2]:
                # the path portion may need to be added before query params
                url_fields[2] = '/'
            value = urlunsplit(url_fields)
        return value 
Example #18
Source File: default.py    From xbmc-addons-chinese with GNU General Public License v2.0 5 votes vote down vote up
def change_cdn(url):
    # if the cnd_url starts with an ip addr, it should be youku's old CDN
    # which rejects http requests randomly with status code > 400
    # change it to the dispatcher of aliCDN can do better
    # at least a little more recoverable from HTTP 403
    dispatcher_url = 'vali.cp31.ott.cibntv.net'
    if dispatcher_url in url:
        return url
    elif 'k.youku.com' in url:
        return url
    else:
        url_seg_list = list(urlparse.urlsplit(url))
        url_seg_list[1] = dispatcher_url
        return urlparse.urlunsplit(url_seg_list) 
Example #19
Source File: views.py    From django-oidc-provider with MIT License 5 votes vote down vote up
def dispatch(self, request, *args, **kwargs):
        id_token_hint = request.GET.get('id_token_hint', '')
        post_logout_redirect_uri = request.GET.get('post_logout_redirect_uri', '')
        state = request.GET.get('state', '')
        client = None

        next_page = settings.get('OIDC_LOGIN_URL')
        after_end_session_hook = settings.get('OIDC_AFTER_END_SESSION_HOOK', import_str=True)

        if id_token_hint:
            client_id = client_id_from_id_token(id_token_hint)
            try:
                client = Client.objects.get(client_id=client_id)
                if post_logout_redirect_uri in client.post_logout_redirect_uris:
                    if state:
                        uri = urlsplit(post_logout_redirect_uri)
                        query_params = parse_qs(uri.query)
                        query_params['state'] = state
                        uri = uri._replace(query=urlencode(query_params, doseq=True))
                        next_page = urlunsplit(uri)
                    else:
                        next_page = post_logout_redirect_uri
            except Client.DoesNotExist:
                pass

        after_end_session_hook(
            request=request,
            id_token=id_token_hint,
            post_logout_redirect_uri=post_logout_redirect_uri,
            state=state,
            client=client,
            next_page=next_page
        )

        self.next_page = next_page
        return super(EndSessionView, self).dispatch(request, *args, **kwargs) 
Example #20
Source File: w9_urlredict.py    From w9scan with GNU General Public License v2.0 5 votes vote down vote up
def audit(arg):
    Ii1iI = arg
    Oo = urlparse.urlparse(Ii1iI)
    I1Ii11I1Ii1i = urlparse.urlunsplit((Oo.scheme, Oo.netloc, Oo.path, "", ""))
    Oo0Ooo = urlparse.parse_qsl(Oo.query)
    oo = ["__VIEWSTATE", "IbtnEnter.x", "IbtnEnter.y"]
    for O0O0OO0O0O0, iiiii in Oo0Ooo:
        if O0O0OO0O0O0 in oo:
            continue
        debug("[RDB] %s %s-", O0O0OO0O0O0, I1Ii11I1Ii1i)
        IiII1I1i1i1ii = iI1(I1Ii11I1Ii1i, Oo0Ooo, O0O0OO0O0O0, iiiii)
        if IiII1I1i1i1ii:
            security_info(IiII1I1i1i1ii[1])
            return 
Example #21
Source File: prestoclient.py    From presto-admin with Apache License 2.0 5 votes vote down vote up
def _get_response_from(self, uri):
        """
        Sends a GET request to the Presto server at the specified next_uri
        and updates the response

        Remove the scheme and host/port from the uri; the connection itself
        has that information.
        """
        parts = list(urlparse.urlsplit(uri))
        parts[0] = None
        parts[1] = None
        location = urlparse.urlunsplit(parts)
        conn = self._get_connection()
        headers = {"X-Presto-User": self.user}
        self._add_auth_headers(headers)
        conn.request("GET", location, headers=headers)
        response = conn.getresponse()

        if response.status != 200:
            conn.close()
            _LOGGER.error("Error making GET request to %s: %s %s" %
                          (uri, response.status, response.reason))
            return False

        answer = response.read()
        conn.close()

        self.response_from_server = json.loads(answer)
        _LOGGER.info("GET request successful for uri: " + uri)
        return True 
Example #22
Source File: iri2uri.py    From googleapps-message-recall with Apache License 2.0 5 votes vote down vote up
def iri2uri(uri):
    """Convert an IRI to a URI. Note that IRIs must be
    passed in a unicode strings. That is, do not utf-8 encode
    the IRI before passing it into the function."""
    if isinstance(uri ,unicode):
        (scheme, authority, path, query, fragment) = urlparse.urlsplit(uri)
        authority = authority.encode('idna')
        # For each character in 'ucschar' or 'iprivate'
        #  1. encode as utf-8
        #  2. then %-encode each octet of that utf-8
        uri = urlparse.urlunsplit((scheme, authority, path, query, fragment))
        uri = "".join([encode(c) for c in uri])
    return uri 
Example #23
Source File: bottle2.py    From pyFileFixity with MIT License 5 votes vote down vote up
def url(self):
        """ Full URL as requested by the client (computed).

            This value is constructed out of different environment variables
            and includes scheme, host, port, scriptname, path and query string.
        """
        scheme = self.environ.get('wsgi.url_scheme', 'http')
        host   = self.environ.get('HTTP_X_FORWARDED_HOST', self.environ.get('HTTP_HOST', None))
        if not host:
            host = self.environ.get('SERVER_NAME')
            port = self.environ.get('SERVER_PORT', '80')
            if scheme + port not in ('https443', 'http80'):
                host += ':' + port
        parts = (scheme, host, urlquote(self.fullpath), self.query_string, '')
        return urlunsplit(parts) 
Example #24
Source File: __init__.py    From misp42splunk with GNU Lesser General Public License v3.0 5 votes vote down vote up
def urldefrag(url):
    if "#" in url:
        s, n, p, q, frag = urlsplit(url)
        defrag = urlunsplit((s, n, p, q, ''))
    else:
        defrag = url
        frag = ''
    return defrag, frag 
Example #25
Source File: utils.py    From snippet with MIT License 5 votes vote down vote up
def geturl(self):
        scheme = self.scheme if self.scheme else self.url.scheme
        netloc = self.netloc if self.netloc else self.url.netloc
        url = self.path if self.path else self.url.path
        params = self.params if self.params else self.url.params
        query = self.query if self.query else self.url.query
        fragment = self.fragment if self.fragment else self.url.fragment

        if params:
            url = "%s;%s" % (url, params)
        return urlparse.urlunsplit((scheme, netloc, url, query, fragment)) 
Example #26
Source File: iri2uri.py    From aqua-monitor with GNU Lesser General Public License v3.0 5 votes vote down vote up
def iri2uri(uri):
    """Convert an IRI to a URI. Note that IRIs must be
    passed in a unicode strings. That is, do not utf-8 encode
    the IRI before passing it into the function."""
    if isinstance(uri ,unicode):
        (scheme, authority, path, query, fragment) = urlparse.urlsplit(uri)
        authority = authority.encode('idna')
        # For each character in 'ucschar' or 'iprivate'
        #  1. encode as utf-8
        #  2. then %-encode each octet of that utf-8
        uri = urlparse.urlunsplit((scheme, authority, path, query, fragment))
        uri = "".join([encode(c) for c in uri])
    return uri 
Example #27
Source File: __init__.py    From rpm-s3 with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _getFragmentUrl(self, url, fragment):
        import urlparse
        urlparse.uses_fragment.append('media')
        if not url:
            return url
        (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url)
        return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment))) 
Example #28
Source File: wms_styles.py    From minerva with Apache License 2.0 5 votes vote down vote up
def _generate_url(wms_url, **kwargs):
        """Generates different urls(wfs or wcs) from a wms url"""

        scheme, netloc, path, query_string, fragment = urlsplit(wms_url)

        if kwargs:
            query_string = kwargs

        new_query_string = urlencode(query_string, doseq=True)

        return urlunsplit((scheme, netloc, path, new_query_string, fragment)) 
Example #29
Source File: scrapertools.py    From tvalacarta with GNU General Public License v3.0 5 votes vote down vote up
def fixurl(url):
    # turn string into unicode
    if not isinstance(url,unicode):
        url = url.decode('utf8')

    # parse it
    parsed = urlparse.urlsplit(url)

    # divide the netloc further
    userpass,at,hostport = parsed.netloc.rpartition('@')
    user,colon1,pass_ = userpass.partition(':')
    host,colon2,port = hostport.partition(':')

    # encode each component
    scheme = parsed.scheme.encode('utf8')
    user = urllib.quote(user.encode('utf8'))
    colon1 = colon1.encode('utf8')
    pass_ = urllib.quote(pass_.encode('utf8'))
    at = at.encode('utf8')
    host = host.encode('idna')
    colon2 = colon2.encode('utf8')
    port = port.encode('utf8')
    path = '/'.join(  # could be encoded slashes!
        urllib.quote(urllib.unquote(pce).encode('utf8'),'')
        for pce in parsed.path.split('/')
    )
    query = urllib.quote(urllib.unquote(parsed.query).encode('utf8'),'=&?/')
    fragment = urllib.quote(urllib.unquote(parsed.fragment).encode('utf8'))

    # put it back together
    netloc = ''.join((user,colon1,pass_,at,host,colon2,port))
    return urlparse.urlunsplit((scheme,netloc,path,query,fragment))

# Some helper methods 
Example #30
Source File: DataServicePillager.py    From DataPillager with MIT License 5 votes vote down vote up
def get_referring_domain(url_string):
    """get referring domain part of url
    :param url_string url of service
    """
    u = urlparse(url_string)
    if u.netloc.find('arcgis.com') > -1:
        # is an esri domain
        ref_domain = r"https://www.arcgis.com"
    else:
        # generate from service url and hope it works
        if u.scheme == 'http':
            ref_domain = urlunsplit(['https', u.netloc, '', '', ''])
        else:
            ref_domain = urlunsplit([u.scheme, u.netloc, '', '', ''])
    return ref_domain