Python urlparse.urldefrag() Examples

The following are code examples for showing how to use urlparse.urldefrag(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: pg-epubmaker   Author: gitenberg-dev   File: HTMLChunker.py    GNU General Public License v3.0 6 votes vote down vote up
def rewrite_internal_links (self):
        """ Rewrite links to point into right chunks.

        Because we split the HTML into chunks, all internal links need
        to be rewritten to become links into the right chunk.
        Rewrite all internal links in all chunks.

        """
        for chunk in self.chunks:
            for a in xpath (chunk['chunk'], "//xhtml:*[@href]"):
                try:
                    uri = unicode_uri (a.get ('href'))
                    a.set ('href', self.idmap[uri])
                except KeyError:
                    ur, dummy_frag = urlparse.urldefrag (uri)
                    if ur in self.idmap:
                        error ("HTMLChunker: Cannot rewrite internal link '%s'" % uri) 
Example 2
Project: Host-Header-Attack-Test   Author: keramatAlijani   File: crawler.py    GNU General Public License v3.0 6 votes vote down vote up
def _pre_visit_url_condense(self, url):
        
        """ Reduce (condense) URLs into some canonical form before
        visiting.  All occurrences of equivalent URLs are treated as
        identical.

        All this does is strip the \"fragment\" component from URLs,
        so that http://foo.com/blah.html\#baz becomes
        http://foo.com/blah.html """

        base, frag = urlparse.urldefrag(url)
        return base

    ## URL Filtering functions.  These all use information from the
    ## state of the Crawler to evaluate whether a given URL should be
    ## used in some context.  Return value of True indicates that the
    ## URL should be used. 
Example 3
Project: OffSec   Author: ducatinat   File: webcrawler.py    GNU General Public License v3.0 6 votes vote down vote up
def _pre_visit_url_condense(self, url):

        """ Reduce (condense) URLs into some canonical form before
        visiting.  All occurrences of equivalent URLs are treated as
        identical.

        All this does is strip the \"fragment\" component from URLs,
        so that http://foo.com/blah.html\#baz becomes
        http://foo.com/blah.html """

        base, frag = urlparse.urldefrag(url)
        return base

    ## URL Filtering functions.  These all use information from the
    ## state of the Crawler to evaluate whether a given URL should be
    ## used in some context.  Return value of True indicates that the
    ## URL should be used. 
Example 4
Project: restoa-tools   Author: restful-open-annotation   File: import.py    MIT License 6 votes vote down vote up
def get_relative_target_urls(document):
    """Return unique relative target URLs in OA JSON-LD document."""
    # TODO: check for @base to differentiate true relative targets
    # from ones that just look relative without context.
    found = set()
    target = document.get(TARGET_KEY)
    if not target:
        warn('missing target')
    elif isinstance(target, basestring):
        if is_relative(target):
            found.add(urlparse.urldefrag(target)[0])
    elif isinstance(target, list):
        for t in target:
            if is_relative(t):
                found.add(urlparse.urldefrag(t)[0])
    else:
        raise NotImplementedError('structured target support')
    return found 
Example 5
Project: python-simple-web-crawler   Author: arzzen   File: crawler.py    MIT License 6 votes vote down vote up
def _pre_visit_url_condense(self, url):
        """ Reduce (condense) URLs into some canonical form before
        visiting.  All occurrences of equivalent URLs are treated as
        identical.

        All this does is strip the \"fragment\" component from URLs,
        so that http://foo.com/blah.html\#baz becomes
        http://foo.com/blah.html 
        """

        base, frag = urlparse.urldefrag(url)
        return base

    ## URL Filtering functions.  These all use information from the
    ## state of the Crawler to evaluate whether a given URL should be
    ## used in some context.  Return value of True indicates that the
    ## URL should be used. 
Example 6
Project: fast_urlparse   Author: Parsely   File: test_urlparse.py    MIT License 6 votes vote down vote up
def test_urldefrag(self):
        str_cases = [
            ('http://python.org#frag', 'http://python.org', 'frag'),
            ('http://python.org', 'http://python.org', ''),
            ('http://python.org/#frag', 'http://python.org/', 'frag'),
            ('http://python.org/', 'http://python.org/', ''),
            ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
            ('http://python.org/?q', 'http://python.org/?q', ''),
            ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
            ('http://python.org/p?q', 'http://python.org/p?q', ''),
            (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
            (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
        ]
        def _encode(t):
            return type(t)(x.encode('ascii') for x in t)
        bytes_cases = [_encode(x) for x in str_cases]
        for url, defrag, frag in str_cases + bytes_cases:
            result = urlparse.urldefrag(url)
            self.assertEqual(result.geturl(), url)
            self.assertEqual(result, (defrag, frag))
            self.assertEqual(result.url, defrag)
            self.assertEqual(result.fragment, frag) 
Example 7
Project: pyblish-win   Author: pyblish   File: test_urlparse.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_urldefrag(self):
        for url, defrag, frag in [
            ('http://python.org#frag', 'http://python.org', 'frag'),
            ('http://python.org', 'http://python.org', ''),
            ('http://python.org/#frag', 'http://python.org/', 'frag'),
            ('http://python.org/', 'http://python.org/', ''),
            ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
            ('http://python.org/?q', 'http://python.org/?q', ''),
            ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
            ('http://python.org/p?q', 'http://python.org/p?q', ''),
            (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
            (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
            ]:
            self.assertEqual(urlparse.urldefrag(url), (defrag, frag)) 
Example 8
Project: openhatch   Author: campbe13   File: webclient.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, request, timeout=180):
        self.url = urldefrag(request.url)[0]
        self.method = request.method
        self.body = request.body or None
        self.headers = Headers(request.headers)
        self.response_headers = None
        self.timeout = request.meta.get('download_timeout') or timeout
        self.start_time = time()
        self.deferred = defer.Deferred().addCallback(self._build_response, request)

        # Fixes Twisted 11.1.0+ support as HTTPClientFactory is expected
        # to have _disconnectedDeferred. See Twisted r32329.
        # As Scrapy implements it's own logic to handle redirects is not
        # needed to add the callback _waitForDisconnect.
        # Specifically this avoids the AttributeError exception when
        # clientConnectionFailed method is called.
        self._disconnectedDeferred = defer.Deferred()

        self._set_connection_attributes(request)

        # set Host header based on url
        self.headers.setdefault('Host', self.netloc)

        # set Content-Length based len of body
        if self.body is not None:
            self.headers['Content-Length'] = len(self.body)
            # just in case a broken http/1.1 decides to keep connection alive
            self.headers.setdefault("Connection", "close") 
Example 9
Project: tornado-zh   Author: tao12345666333   File: webspider.py    MIT License 5 votes vote down vote up
def remove_fragment(url):
    pure_url, frag = urldefrag(url)
    return pure_url 
Example 10
Project: Python_Master_Courses   Author: makelove   File: tornado-crawler-demo2.py    GNU General Public License v3.0 5 votes vote down vote up
def remove_fragment(self, url):
        pure_url, frag = urldefrag(url)
        return pure_url

    # 使用HTMLParser分析html,获取到里面的urls,也可以使用BeautifulSoup等. 
Example 11
Project: Python_Master_Courses   Author: makelove   File: tornado-crawler-demo1.py    GNU General Public License v3.0 5 votes vote down vote up
def remove_fragment(url):
    pure_url, frag = urldefrag(url)
    return pure_url 
Example 12
Project: pg-epubmaker   Author: gitenberg-dev   File: __init__.py    GNU General Public License v3.0 5 votes vote down vote up
def remap_links (self, url_map):
        """ Rewrite all links using the dictionary url_map. """
        def f (url):
            """ Remap function """
            ur, frag = urlparse.urldefrag (url)
            if ur in url_map:
                debug ("Rewriting redirected url: %s to %s" % (ur, url_map[ur]))
                ur = url_map[ur]
            return "%s#%s" % (ur, frag) if frag else ur
            
        self.rewrite_links (f) 
Example 13
Project: oa_qian   Author: sunqb   File: discover.py    Apache License 2.0 5 votes vote down vote up
def getDisplayIdentifier(self):
        """Return the display_identifier if set, else return the claimed_id.
        """
        if self.display_identifier is not None:
            return self.display_identifier
        if self.claimed_id is None:
            return None
        else:
            return urlparse.urldefrag(self.claimed_id)[0] 
Example 14
Project: ironpython2   Author: IronLanguages   File: test_urlparse.py    Apache License 2.0 5 votes vote down vote up
def test_urldefrag(self):
        for url, defrag, frag in [
            ('http://python.org#frag', 'http://python.org', 'frag'),
            ('http://python.org', 'http://python.org', ''),
            ('http://python.org/#frag', 'http://python.org/', 'frag'),
            ('http://python.org/', 'http://python.org/', ''),
            ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
            ('http://python.org/?q', 'http://python.org/?q', ''),
            ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
            ('http://python.org/p?q', 'http://python.org/p?q', ''),
            (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
            (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
            ]:
            self.assertEqual(urlparse.urldefrag(url), (defrag, frag)) 
Example 15
Project: open-recipe   Author: dspray95   File: client.py    The Unlicense 5 votes vote down vote up
def _urljoin(base, url):
    """
    Construct a full ("absolute") URL by combining a "base URL" with another
    URL. Informally, this uses components of the base URL, in particular the
    addressing scheme, the network location and (part of) the path, to provide
    missing components in the relative URL.

    Additionally, the fragment identifier is preserved according to the HTTP
    1.1 bis draft.

    @type base: C{bytes}
    @param base: Base URL.

    @type url: C{bytes}
    @param url: URL to combine with C{base}.

    @return: An absolute URL resulting from the combination of C{base} and
        C{url}.

    @see: L{urlparse.urljoin}

    @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2}
    """
    base, baseFrag = urldefrag(base)
    url, urlFrag = urldefrag(urljoin(base, url))
    return urljoin(url, b'#' + (urlFrag or baseFrag)) 
Example 16
Project: PyCrawler   Author: princeedward   File: urlhandler.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def SingleUrlFilter(url, param):
    result = urlparse(url)
    domain = result.netloc
    defraged_url = urldefrag(url)
    return defraged_url[0], domain 
Example 17
Project: xuemc   Author: skycucumber   File: discover.py    GNU General Public License v2.0 5 votes vote down vote up
def getDisplayIdentifier(self):
        """Return the display_identifier if set, else return the claimed_id.
        """
        if self.display_identifier is not None:
            return self.display_identifier
        if self.claimed_id is None:
            return None
        else:
            return urlparse.urldefrag(self.claimed_id)[0] 
Example 18
Project: oss-ftp   Author: aliyun   File: test_urlparse.py    MIT License 5 votes vote down vote up
def test_urldefrag(self):
        for url, defrag, frag in [
            ('http://python.org#frag', 'http://python.org', 'frag'),
            ('http://python.org', 'http://python.org', ''),
            ('http://python.org/#frag', 'http://python.org/', 'frag'),
            ('http://python.org/', 'http://python.org/', ''),
            ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
            ('http://python.org/?q', 'http://python.org/?q', ''),
            ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
            ('http://python.org/p?q', 'http://python.org/p?q', ''),
            (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
            (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
            ]:
            self.assertEqual(urlparse.urldefrag(url), (defrag, frag)) 
Example 19
Project: Safejumper-for-Desktop   Author: proxysh   File: client.py    GNU General Public License v2.0 5 votes vote down vote up
def _urljoin(base, url):
    """
    Construct a full ("absolute") URL by combining a "base URL" with another
    URL. Informally, this uses components of the base URL, in particular the
    addressing scheme, the network location and (part of) the path, to provide
    missing components in the relative URL.

    Additionally, the fragment identifier is preserved according to the HTTP
    1.1 bis draft.

    @type base: C{bytes}
    @param base: Base URL.

    @type url: C{bytes}
    @param url: URL to combine with C{base}.

    @return: An absolute URL resulting from the combination of C{base} and
        C{url}.

    @see: L{urlparse.urljoin}

    @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2}
    """
    base, baseFrag = urldefrag(base)
    url, urlFrag = urldefrag(urljoin(base, url))
    return urljoin(url, b'#' + (urlFrag or baseFrag)) 
Example 20
Project: forum-xblock   Author: DANCEcollaborative   File: storage.py    MIT License 5 votes vote down vote up
def url(self, name, force=False):
        """
        Returns the real URL in DEBUG mode.
        """
        if settings.DEBUG and not force:
            hashed_name, fragment = name, ''
        else:
            clean_name, fragment = urldefrag(name)
            if urlsplit(clean_name).path.endswith('/'):  # don't hash paths
                hashed_name = name
            else:
                cache_key = self.cache_key(name)
                hashed_name = self.cache.get(cache_key)
                if hashed_name is None:
                    hashed_name = self.hashed_name(clean_name).replace('\\', '/')
                    # set the cache if there was a miss
                    # (e.g. if cache server goes down)
                    self.cache.set(cache_key, hashed_name)

        final_url = super(CachedFilesMixin, self).url(hashed_name)

        # Special casing for a @font-face hack, like url(myfont.eot?#iefix")
        # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax
        query_fragment = '?#' in name  # [sic!]
        if fragment or query_fragment:
            urlparts = list(urlsplit(final_url))
            if fragment and not urlparts[4]:
                urlparts[4] = fragment
            if query_fragment and not urlparts[3]:
                urlparts[2] += '?'
            final_url = urlunsplit(urlparts)

        return unquote(final_url) 
Example 21
Project: weibo_twitter_earnings_analysis   Author: lynings   File: client.py    Apache License 2.0 5 votes vote down vote up
def _urljoin(base, url):
    """
    Construct a full ("absolute") URL by combining a "base URL" with another
    URL. Informally, this uses components of the base URL, in particular the
    addressing scheme, the network location and (part of) the path, to provide
    missing components in the relative URL.

    Additionally, the fragment identifier is preserved according to the HTTP
    1.1 bis draft.

    @type base: C{bytes}
    @param base: Base URL.

    @type url: C{bytes}
    @param url: URL to combine with C{base}.

    @return: An absolute URL resulting from the combination of C{base} and
        C{url}.

    @see: L{urlparse.urljoin}

    @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2}
    """
    base, baseFrag = urldefrag(base)
    url, urlFrag = urldefrag(urljoin(base, url))
    return urljoin(url, b'#' + (urlFrag or baseFrag)) 
Example 22
Project: learn_python3_spider   Author: wistbean   File: client.py    MIT License 5 votes vote down vote up
def _urljoin(base, url):
    """
    Construct a full ("absolute") URL by combining a "base URL" with another
    URL. Informally, this uses components of the base URL, in particular the
    addressing scheme, the network location and (part of) the path, to provide
    missing components in the relative URL.

    Additionally, the fragment identifier is preserved according to the HTTP
    1.1 bis draft.

    @type base: C{bytes}
    @param base: Base URL.

    @type url: C{bytes}
    @param url: URL to combine with C{base}.

    @return: An absolute URL resulting from the combination of C{base} and
        C{url}.

    @see: L{urlparse.urljoin}

    @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2}
    """
    base, baseFrag = urldefrag(base)
    url, urlFrag = urldefrag(urljoin(base, url))
    return urljoin(url, b'#' + (urlFrag or baseFrag)) 
Example 23
Project: restoa-tools   Author: restful-open-annotation   File: import.py    MIT License 5 votes vote down vote up
def _map_relative(target, target_map):
    # Helper for rewrite_relative_target_urls
    assert isinstance(target, basestring)
    if not is_relative(target):
        return target
    base, frag = urlparse.urldefrag(target)
    mapped = target_map.get(base)
    if not mapped:
        return target
    else:
        return mapped + '#' + frag 
Example 24
Project: restoa-tools   Author: restful-open-annotation   File: export.py    MIT License 5 votes vote down vote up
def target_urls(annotations, target_key='target'):
    """Return list of unique target URLs for Open Annotation objects."""
    uniques = set()
    for annotation in annotations:
        targets = annotation[target_key]
        if isinstance(targets, basestring):
            targets = [targets]
        for target in targets:
            url = urlparse.urldefrag(target)[0]
            uniques.add(url)
    return list(uniques) 
Example 25
Project: Hot   Author: dsolimando   File: test_urlparse.py    GNU General Public License v3.0 5 votes vote down vote up
def test_urldefrag(self):
        for url, defrag, frag in [
            ('http://python.org#frag', 'http://python.org', 'frag'),
            ('http://python.org', 'http://python.org', ''),
            ('http://python.org/#frag', 'http://python.org/', 'frag'),
            ('http://python.org/', 'http://python.org/', ''),
            ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
            ('http://python.org/?q', 'http://python.org/?q', ''),
            ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
            ('http://python.org/p?q', 'http://python.org/p?q', ''),
            (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
            (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
            ]:
            self.assertEqual(urlparse.urldefrag(url), (defrag, frag)) 
Example 26
Project: global-ssh-server   Author: nthiep   File: storage.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def url(self, name, force=False):
        """
        Returns the real URL in DEBUG mode.
        """
        if settings.DEBUG and not force:
            hashed_name, fragment = name, ''
        else:
            clean_name, fragment = urldefrag(name)
            if urlsplit(clean_name).path.endswith('/'):  # don't hash paths
                hashed_name = name
            else:
                cache_key = self.cache_key(name)
                hashed_name = self.cache.get(cache_key)
                if hashed_name is None:
                    hashed_name = self.hashed_name(clean_name).replace('\\', '/')
                    # set the cache if there was a miss
                    # (e.g. if cache server goes down)
                    self.cache.set(cache_key, hashed_name)

        final_url = super(CachedFilesMixin, self).url(hashed_name)

        # Special casing for a @font-face hack, like url(myfont.eot?#iefix")
        # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax
        query_fragment = '?#' in name  # [sic!]
        if fragment or query_fragment:
            urlparts = list(urlsplit(final_url))
            if fragment and not urlparts[4]:
                urlparts[4] = fragment
            if query_fragment and not urlparts[3]:
                urlparts[2] += '?'
            final_url = urlunsplit(urlparts)

        return unquote(final_url) 
Example 27
Project: wikipedia-crawler   Author: bernardopires   File: tasks.py    MIT License 5 votes vote down vote up
def build_link(link, parent):
    # urljoin is so magical that even if link is an absolute URL it will just use that
    return urldefrag(urljoin(parent, link))[0] 
Example 28
Project: PocCollect   Author: nanshihui   File: linktool.py    MIT License 5 votes vote down vote up
def get_tag_a(self):
        # 处理A链接
        for tag in self.soup.find_all('a'):
            if tag.attrs.has_key('href'):
                link = tag.attrs['href']
                # link = urlparse.urldefrag(tag.attrs['href'])[0] # 处理掉#tag标签信息
                complet_link = self.complet_url(link.strip())
                if complet_link:
                    self.url_links['a'].append(complet_link)
        return self.url_links 
Example 29
Project: server   Author: lowitty   File: client.py    MIT License 5 votes vote down vote up
def _urljoin(base, url):
    """
    Construct a full ("absolute") URL by combining a "base URL" with another
    URL. Informally, this uses components of the base URL, in particular the
    addressing scheme, the network location and (part of) the path, to provide
    missing components in the relative URL.

    Additionally, the fragment identifier is preserved according to the HTTP
    1.1 bis draft.

    @type base: C{bytes}
    @param base: Base URL.

    @type url: C{bytes}
    @param url: URL to combine with C{base}.

    @return: An absolute URL resulting from the combination of C{base} and
        C{url}.

    @see: L{urlparse.urljoin}

    @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2}
    """
    base, baseFrag = urldefrag(base)
    url, urlFrag = urldefrag(urljoin(base, url))
    return urljoin(url, b'#' + (urlFrag or baseFrag)) 
Example 30
Project: server   Author: lowitty   File: client.py    MIT License 5 votes vote down vote up
def _urljoin(base, url):
    """
    Construct a full ("absolute") URL by combining a "base URL" with another
    URL. Informally, this uses components of the base URL, in particular the
    addressing scheme, the network location and (part of) the path, to provide
    missing components in the relative URL.

    Additionally, the fragment identifier is preserved according to the HTTP
    1.1 bis draft.

    @type base: C{bytes}
    @param base: Base URL.

    @type url: C{bytes}
    @param url: URL to combine with C{base}.

    @return: An absolute URL resulting from the combination of C{base} and
        C{url}.

    @see: L{urlparse.urljoin}

    @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2}
    """
    base, baseFrag = urldefrag(base)
    url, urlFrag = urldefrag(urljoin(base, url))
    return urljoin(url, b'#' + (urlFrag or baseFrag)) 
Example 31
Project: WebScraping   Author: liinnux   File: link_crawler3.py    Apache License 2.0 5 votes vote down vote up
def normalize(seed_url, link):
    """Normalize this URL by removing hash and adding domain
    """
    link, _ = urlparse.urldefrag(link) # remove hash to avoid duplicates
    return urlparse.urljoin(seed_url, link) 
Example 32
Project: WebScraping   Author: liinnux   File: process_crawler.py    Apache License 2.0 5 votes vote down vote up
def normalize(seed_url, link):
    """Normalize this URL by removing hash and adding domain
    """
    link, _ = urlparse.urldefrag(link) # remove hash to avoid duplicates
    return urlparse.urljoin(seed_url, link) 
Example 33
Project: WebScraping   Author: liinnux   File: threaded_crawler.py    Apache License 2.0 5 votes vote down vote up
def normalize(seed_url, link):
    """Normalize this URL by removing hash and adding domain
    """
    link, _ = urlparse.urldefrag(link) # remove hash to avoid duplicates
    return urlparse.urljoin(seed_url, link) 
Example 34
Project: WebScraping   Author: liinnux   File: link_crawler.py    Apache License 2.0 5 votes vote down vote up
def normalize(seed_url, link):
    """Normalize this URL by removing hash and adding domain
    """
    link, _ = urlparse.urldefrag(link) # remove hash to avoid duplicates
    return urlparse.urljoin(seed_url, link) 
Example 35
Project: WebScraping   Author: liinnux   File: link_crawler.py    Apache License 2.0 5 votes vote down vote up
def normalize(seed_url, link):
    """Normalize this URL by removing hash and adding domain
    """
    link, _ = urlparse.urldefrag(link) # remove hash to avoid duplicates
    return urlparse.urljoin(seed_url, link) 
Example 36
Project: krauler   Author: occrp-attic   File: url.py    MIT License 5 votes vote down vote up
def normalize_url(url):
    # TODO: learn from https://github.com/hypothesis/h/blob/master/h/api/uri.py
    try:
        url = urlnorm.norm(url)
        url, _ = urldefrag(url)
        url = re.sub('[\n\r]', '', url)
        url = url.rstrip('/')
        return url
    except:
        return None 
Example 37
Project: voteswap   Author: sbuss   File: discover.py    MIT License 5 votes vote down vote up
def getDisplayIdentifier(self):
        """Return the display_identifier if set, else return the claimed_id.
        """
        if self.display_identifier is not None:
            return self.display_identifier
        if self.claimed_id is None:
            return None
        else:
            return urlparse.urldefrag(self.claimed_id)[0] 
Example 38
Project: webid-auth-nginx   Author: zenomt   File: auth.py    MIT License 4 votes vote down vote up
def fetch_graph_cached_shared(uri):
	debug_log("fetch graph shared <%s>", uri)
	if isinstance(uri, unicode):
		uri = uri.encode('utf8')
	uri = urlparse.urldefrag(uri)[0]
	entry = fetch_graph_cache.get(uri)
	if entry and (entry['stale_at'] > time.time()):
		debug_log("<%s> fresh in cache", uri)
		returnValue(entry['graph'])
	request_queue = fetch_graph_requests.get(uri)
	if request_queue is None:
		request_queue = []
		fetch_graph_requests[uri] = request_queue
		try:
			headers = Headers()
			if entry and entry['etag']:
				headers.addRawHeader('If-None-Match', entry['etag'])
			request = agent.request(b'GET', uri, headers=headers)
			request.addTimeout(args.http_timeout, reactor)
			response = yield request
			body = yield readBody(response)

			if 304 == response.code:
				entry['stale_at'] = time.time() + args.stale_period
				graph = entry['graph']
				debug_log("<%s> 304 Not Modified", uri)
			elif 200 != response.code:
				raise ValueError("bad response from <%s>: %s" % (uri, response.code))
			else:
				graph = rdflib.Graph()
				format = response.headers.getRawHeaders("content-type", [None])[0]
				format = re.split(r' *; *', format)[0] if format else None
				etag = response.headers.getRawHeaders("ETag", [None])[0]
				graph.parse(data=body, format=format, publicID=uri)
				fetch_graph_cache[uri] = dict(etag=etag, graph=graph, stale_at=time.time() + args.stale_period)
				debug_log("<%s> newly loaded", uri)

			del fetch_graph_requests[uri]
			for each in request_queue:
				each.callback(graph)

			returnValue(graph)

		except Exception as e:
			del fetch_graph_requests[uri]
			for each in request_queue:
				each.errback(e)
			raise e
	else:
		d = Deferred()
		request_queue.append(d)
		graph = yield d
		debug_log("<%s> from shared download", uri)
		returnValue(graph) 
Example 39
Project: oa_qian   Author: sunqb   File: consumer.py    Apache License 2.0 4 votes vote down vote up
def _verifyDiscoverySingle(self, endpoint, to_match):
        """Verify that the given endpoint matches the information
        extracted from the OpenID assertion, and raise an exception if
        there is a mismatch.

        @type endpoint: openid.consumer.discover.OpenIDServiceEndpoint
        @type to_match: openid.consumer.discover.OpenIDServiceEndpoint

        @rtype: NoneType

        @raises ProtocolError: when the endpoint does not match the
            discovered information.
        """
        # Every type URI that's in the to_match endpoint has to be
        # present in the discovered endpoint.
        for type_uri in to_match.type_uris:
            if not endpoint.usesExtension(type_uri):
                raise TypeURIMismatch(type_uri, endpoint)

        # Fragments do not influence discovery, so we can't compare a
        # claimed identifier with a fragment to discovered information.
        defragged_claimed_id, _ = urldefrag(to_match.claimed_id)
        if defragged_claimed_id != endpoint.claimed_id:
            raise ProtocolError(
                'Claimed ID does not match (different subjects!), '
                'Expected %s, got %s' %
                (defragged_claimed_id, endpoint.claimed_id))

        if to_match.getLocalID() != endpoint.getLocalID():
            raise ProtocolError('local_id mismatch. Expected %s, got %s' %
                                (to_match.getLocalID(), endpoint.getLocalID()))

        # If the server URL is None, this must be an OpenID 1
        # response, because op_endpoint is a required parameter in
        # OpenID 2. In that case, we don't actually care what the
        # discovered server_url is, because signature checking or
        # check_auth should take care of that check for us.
        if to_match.server_url is None:
            assert to_match.preferredNamespace() == OPENID1_NS, (
                """The code calling this must ensure that OpenID 2
                responses have a non-none `openid.op_endpoint' and
                that it is set as the `server_url' attribute of the
                `to_match' endpoint.""")

        elif to_match.server_url != endpoint.server_url:
            raise ProtocolError('OP Endpoint mismatch. Expected %s, got %s' %
                                (to_match.server_url, endpoint.server_url)) 
Example 40
Project: xuemc   Author: skycucumber   File: consumer.py    GNU General Public License v2.0 4 votes vote down vote up
def _verifyDiscoverySingle(self, endpoint, to_match):
        """Verify that the given endpoint matches the information
        extracted from the OpenID assertion, and raise an exception if
        there is a mismatch.

        @type endpoint: openid.consumer.discover.OpenIDServiceEndpoint
        @type to_match: openid.consumer.discover.OpenIDServiceEndpoint

        @rtype: NoneType

        @raises ProtocolError: when the endpoint does not match the
            discovered information.
        """
        # Every type URI that's in the to_match endpoint has to be
        # present in the discovered endpoint.
        for type_uri in to_match.type_uris:
            if not endpoint.usesExtension(type_uri):
                raise TypeURIMismatch(type_uri, endpoint)

        # Fragments do not influence discovery, so we can't compare a
        # claimed identifier with a fragment to discovered information.
        defragged_claimed_id, _ = urldefrag(to_match.claimed_id)
        if defragged_claimed_id != endpoint.claimed_id:
            raise ProtocolError(
                'Claimed ID does not match (different subjects!), '
                'Expected %s, got %s' %
                (defragged_claimed_id, endpoint.claimed_id))

        if to_match.getLocalID() != endpoint.getLocalID():
            raise ProtocolError('local_id mismatch. Expected %s, got %s' %
                                (to_match.getLocalID(), endpoint.getLocalID()))

        # If the server URL is None, this must be an OpenID 1
        # response, because op_endpoint is a required parameter in
        # OpenID 2. In that case, we don't actually care what the
        # discovered server_url is, because signature checking or
        # check_auth should take care of that check for us.
        if to_match.server_url is None:
            assert to_match.preferredNamespace() == OPENID1_NS, (
                """The code calling this must ensure that OpenID 2
                responses have a non-none `openid.op_endpoint' and
                that it is set as the `server_url' attribute of the
                `to_match' endpoint.""")

        elif to_match.server_url != endpoint.server_url:
            raise ProtocolError('OP Endpoint mismatch. Expected %s, got %s' %
                                (to_match.server_url, endpoint.server_url)) 
Example 41
Project: voteswap   Author: sbuss   File: consumer.py    MIT License 4 votes vote down vote up
def _verifyDiscoverySingle(self, endpoint, to_match):
        """Verify that the given endpoint matches the information
        extracted from the OpenID assertion, and raise an exception if
        there is a mismatch.

        @type endpoint: openid.consumer.discover.OpenIDServiceEndpoint
        @type to_match: openid.consumer.discover.OpenIDServiceEndpoint

        @rtype: NoneType

        @raises ProtocolError: when the endpoint does not match the
            discovered information.
        """
        # Every type URI that's in the to_match endpoint has to be
        # present in the discovered endpoint.
        for type_uri in to_match.type_uris:
            if not endpoint.usesExtension(type_uri):
                raise TypeURIMismatch(type_uri, endpoint)

        # Fragments do not influence discovery, so we can't compare a
        # claimed identifier with a fragment to discovered information.
        defragged_claimed_id, _ = urldefrag(to_match.claimed_id)
        if defragged_claimed_id != endpoint.claimed_id:
            raise ProtocolError(
                'Claimed ID does not match (different subjects!), '
                'Expected %s, got %s' %
                (defragged_claimed_id, endpoint.claimed_id))

        if to_match.getLocalID() != endpoint.getLocalID():
            raise ProtocolError('local_id mismatch. Expected %s, got %s' %
                                (to_match.getLocalID(), endpoint.getLocalID()))

        # If the server URL is None, this must be an OpenID 1
        # response, because op_endpoint is a required parameter in
        # OpenID 2. In that case, we don't actually care what the
        # discovered server_url is, because signature checking or
        # check_auth should take care of that check for us.
        if to_match.server_url is None:
            assert to_match.preferredNamespace() == OPENID1_NS, (
                """The code calling this must ensure that OpenID 2
                responses have a non-none `openid.op_endpoint' and
                that it is set as the `server_url' attribute of the
                `to_match' endpoint.""")

        elif to_match.server_url != endpoint.server_url:
            raise ProtocolError('OP Endpoint mismatch. Expected %s, got %s' %
                                (to_match.server_url, endpoint.server_url))