Python six.moves.urllib.parse.urljoin() Examples

The following are 30 code examples of six.moves.urllib.parse.urljoin(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module six.moves.urllib.parse , or try the search function .
Example #1
Source Project: pymonzo   Author: pawelad   File: monzo_api.py    License: MIT License 7 votes vote down vote up
def _get_oauth_token(self):
        """
        Get Monzo access token via OAuth2 `authorization code` grant type.

        Official docs:
            https://monzo.com/docs/#acquire-an-access-token

        :returns: OAuth 2 access token
        :rtype: dict
        """
        url = urljoin(self.api_url, '/oauth2/token')

        oauth = OAuth2Session(
            client_id=self._client_id,
            redirect_uri=config.REDIRECT_URI,
        )

        token = oauth.fetch_token(
            token_url=url,
            code=self._auth_code,
            client_secret=self._client_secret,
        )

        return token 
Example #2
Source Project: scrapy-cluster   Author: istresearch   File: lxmlhtml.py    License: MIT License 6 votes vote down vote up
def _extract_links(self, selector, response_url, response_encoding, base_url):
        '''
        Pretty much the same function, just added 'ignore' to to_native_str()
        '''
        links = []
        # hacky way to get the underlying lxml parsed document
        for el, attr, attr_val in self._iter_links(selector.root):
            # pseudo lxml.html.HtmlElement.make_links_absolute(base_url)
            try:
                attr_val = urljoin(base_url, attr_val)
            except ValueError:
                continue # skipping bogus links
            else:
                url = self.process_attr(attr_val)
                if url is None:
                    continue
            # added 'ignore' to encoding errors
            url = to_native_str(url, encoding=response_encoding,
                                errors='ignore')
            # to fix relative links after process_value
            url = urljoin(response_url, url)
            link = Link(url, _collect_string_content(el) or u'',
                        nofollow=rel_has_nofollow(el.get('rel')))
            links.append(link)
        return self._deduplicate_if_needed(links) 
Example #3
Source Project: pymonzo   Author: pawelad   File: test_monzo_api.py    License: MIT License 6 votes vote down vote up
def test_class_get_oauth_token_method(self, mocker, mocked_monzo):
        """Test class `_get_oauth_token` method"""
        mocked_fetch_token = mocker.MagicMock()
        mocked_oauth2_session = mocker.patch('pymonzo.monzo_api.OAuth2Session')
        mocked_oauth2_session.return_value.fetch_token = mocked_fetch_token

        token = mocked_monzo._get_oauth_token()

        assert token == mocked_fetch_token.return_value

        mocked_oauth2_session.assert_called_once_with(
            client_id=mocked_monzo._client_id,
            redirect_uri=config.REDIRECT_URI,
        )
        mocked_fetch_token.assert_called_once_with(
            token_url=urljoin(mocked_monzo.api_url, '/oauth2/token'),
            code=mocked_monzo._auth_code,
            client_secret=mocked_monzo._client_secret,
        ) 
Example #4
Source Project: designate   Author: openstack   File: connector.py    License: Apache License 2.0 6 votes vote down vote up
def _construct_url(self, relative_path, query_params=None, extattrs=None):
        if query_params is None:
            query_params = {}
        if extattrs is None:
            extattrs = {}

        if not relative_path or relative_path[0] == '/':
            raise ValueError('Path in request must be relative.')
        query = ''
        if query_params or extattrs:
            query = '?'

        if extattrs:
            attrs_queries = []
            for key, value in extattrs.items():
                LOG.debug("key: %s, value: %s", key, value)
                attrs_queries.append('*' + key + '=' + value['value'])
            query += '&'.join(attrs_queries)
        if query_params:
            if len(query) > 1:
                query += '&'
            query += parse.urlencode(query_params)

        baseurl = parse.urljoin(self.wapi_url, parse.quote(relative_path))
        return baseurl + query 
Example #5
Source Project: stockfighter   Author: striglia   File: stockfighter.py    License: ISC License 6 votes vote down vote up
def place_new_order(self, stock, price, qty, direction, order_type):
        """Place an order for a stock.

        https://starfighter.readme.io/docs/place-new-order
        """
        url_fragment = 'venues/{venue}/stocks/{stock}/orders'.format(
            venue=self.venue,
            stock=stock,
        )
        data = {
          "stock": stock,
          "price": price,
          "venue": self.venue,
          "account": self.account,
          "qty": qty,
          "direction": direction,
          "orderType": order_type,
        }
        url = urljoin(self.base_url, url_fragment)
        resp = self.session.post(url, json=data)
        return resp.json() 
Example #6
Source Project: scrape   Author: huntrar   File: utils.py    License: MIT License 6 votes vote down vote up
def clean_url(url, base_url=None):
    """Add base netloc and path to internal URLs and remove www, fragments."""
    parsed_url = urlparse(url)

    fragment = "{url.fragment}".format(url=parsed_url)
    if fragment:
        url = url.split(fragment)[0]

    # Identify internal URLs and fix their format
    netloc = "{url.netloc}".format(url=parsed_url)
    if base_url is not None and not netloc:
        parsed_base = urlparse(base_url)
        split_base = "{url.scheme}://{url.netloc}{url.path}/".format(url=parsed_base)
        url = urljoin(split_base, url)
        netloc = "{url.netloc}".format(url=urlparse(url))

    if "www." in netloc:
        url = url.replace(netloc, netloc.replace("www.", ""))
    return url.rstrip(string.punctuation) 
Example #7
Source Project: sentinelsat   Author: sentinelsat   File: sentinel.py    License: GNU General Public License v3.0 6 votes vote down vote up
def is_online(self, id):
        """Returns whether a product is online

        Parameters
        ----------
        id : string
            UUID of the product, e.g. 'a8dd0cfd-613e-45ce-868c-d79177b916ed'

        Returns
        -------
        bool
            True if online, False if in LTA

        """
        # Check https://scihub.copernicus.eu/userguide/ODataAPI#Products_entity for more information

        url = urljoin(self.api_url, "odata/v1/Products('{}')/Online/$value".format(id))
        r = self.session.get(url, auth=self.session.auth, timeout=self.timeout)
        _check_scihub_response(r)
        return r.json() 
Example #8
Source Project: pulsar   Author: galaxyproject   File: interface.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, destination_params, transport):
        self.transport = transport
        remote_host = destination_params.get("url")
        assert remote_host is not None, "Failed to determine url for Pulsar client."
        if not remote_host.startswith("http"):
            remote_host = "http://%s" % remote_host
        manager = destination_params.get("manager", None)
        if manager:
            if "/managers/" in remote_host:
                log.warning("Ignoring manager tag '%s', Pulsar client URL already contains a \"/managers/\" path." % manager)
            else:
                remote_host = urljoin(remote_host, "managers/%s" % manager)
        if not remote_host.endswith("/"):
            remote_host = "%s/" % remote_host
        self.remote_host = remote_host
        self.private_token = destination_params.get("private_token", None) 
Example #9
Source Project: hfut   Author: elonzh   File: session.py    License: MIT License 6 votes vote down vote up
def prepare_request(self, request):
        parsed = parse.urlparse(request.url)
        # 非法字符检查
        if ENV['REQUEST_ARGUMENTS_CHECK'] and (not parsed.netloc or parsed.netloc == parse.urlparse(self.host).netloc):
            for k, v in reduce(lambda x, y: x + list(y.items()), (request.params, request.data), []):
                pattern = ENV['ILLEGAL_CHARACTERS_PATTERN']
                result = pattern.search(str(k)) or pattern.search(str(v))
                if result:
                    msg = ''.join(['参数中出现非法字符: ', result.group()])
                    raise ValidationError(msg)
        if not parsed.netloc:
            # requests 在准备 url 进行解析, 因此只能在准备前将 url 换成完整的地址
            # requests.models.PreparedRequest#prepare_url
            request.url = parse.urljoin(self.host, request.url)

        return super(BaseSession, self).prepare_request(request) 
Example #10
Source Project: conda-concourse-ci   Author: conda   File: uploads.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_upload_channels(upload_config_dir, subdir, channels=None):
    """thought here was to provide whatever channel you have set as an output also to be an input

    Killed this in favor of setting channels in condarc in the docker image.
    """
    configurations = load_yaml_config_dir(upload_config_dir)
    channels = channels or []

    for config in configurations:
        if 'token' in config:
            channels.append(config['user'])
        elif 'server' in config:
            channels.append(parse.urljoin('http://' + config['server'],
                            config['destination_path'].format(subdir=subdir)))
        else:
            channels.append(config['channel'])
    return channels 
Example #11
Source Project: learn_python3_spider   Author: wistbean   File: redirect.py    License: MIT License 6 votes vote down vote up
def process_response(self, request, response, spider):
        if (request.meta.get('dont_redirect', False) or
                response.status in getattr(spider, 'handle_httpstatus_list', []) or
                response.status in request.meta.get('handle_httpstatus_list', []) or
                request.meta.get('handle_httpstatus_all', False)):
            return response

        allowed_status = (301, 302, 303, 307, 308)
        if 'Location' not in response.headers or response.status not in allowed_status:
            return response

        location = safe_url_string(response.headers['location'])

        redirected_url = urljoin(request.url, location)

        if response.status in (301, 307, 308) or request.method == 'HEAD':
            redirected = request.replace(url=redirected_url)
            return self._redirect(redirected, request, spider, response.status)

        redirected = self._redirect_request_using_get(request, redirected_url)
        return self._redirect(redirected, request, spider, response.status) 
Example #12
Source Project: learn_python3_spider   Author: wistbean   File: regex.py    License: MIT License 6 votes vote down vote up
def _extract_links(self, response_text, response_url, response_encoding, base_url=None):
        def clean_text(text):
            return replace_escape_chars(remove_tags(text.decode(response_encoding))).strip()

        def clean_url(url):
            clean_url = ''
            try:
                clean_url = urljoin(base_url, replace_entities(clean_link(url.decode(response_encoding))))
            except ValueError:
                pass
            return clean_url

        if base_url is None:
            base_url = get_base_url(response_text, response_url, response_encoding)

        links_text = linkre.findall(response_text)
        return [Link(clean_url(url).encode(response_encoding),
                     clean_text(text))
                for url, _, text in links_text] 
Example #13
Source Project: learn_python3_spider   Author: wistbean   File: lxmlhtml.py    License: MIT License 6 votes vote down vote up
def _extract_links(self, selector, response_url, response_encoding, base_url):
        links = []
        # hacky way to get the underlying lxml parsed document
        for el, attr, attr_val in self._iter_links(selector.root):
            # pseudo lxml.html.HtmlElement.make_links_absolute(base_url)
            try:
                if self.strip:
                    attr_val = strip_html5_whitespace(attr_val)
                attr_val = urljoin(base_url, attr_val)
            except ValueError:
                continue  # skipping bogus links
            else:
                url = self.process_attr(attr_val)
                if url is None:
                    continue
            url = to_native_str(url, encoding=response_encoding)
            # to fix relative links after process_value
            url = urljoin(response_url, url)
            link = Link(url, _collect_string_content(el) or u'',
                        nofollow=rel_has_nofollow(el.get('rel')))
            links.append(link)
        return self._deduplicate_if_needed(links) 
Example #14
Source Project: learn_python3_spider   Author: wistbean   File: sgml.py    License: MIT License 6 votes vote down vote up
def _extract_links(self, response_text, response_url, response_encoding, base_url=None):
        """ Do the real extraction work """
        self.reset()
        self.feed(response_text)
        self.close()

        ret = []
        if base_url is None:
            base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
        for link in self.links:
            if isinstance(link.url, six.text_type):
                link.url = link.url.encode(response_encoding)
            try:
                link.url = urljoin(base_url, link.url)
            except ValueError:
                continue
            link.url = safe_url_string(link.url, response_encoding)
            link.text = to_unicode(link.text, response_encoding, errors='replace').strip()
            ret.append(link)

        return ret 
Example #15
Source Project: learn_python3_spider   Author: wistbean   File: htmlparser.py    License: MIT License 6 votes vote down vote up
def _extract_links(self, response_text, response_url, response_encoding):
        self.reset()
        self.feed(response_text)
        self.close()

        links = unique_list(self.links, key=lambda link: link.url) if self.unique else self.links

        ret = []
        base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
        for link in links:
            if isinstance(link.url, six.text_type):
                link.url = link.url.encode(response_encoding)
            try:
                link.url = urljoin(base_url, link.url)
            except ValueError:
                continue
            link.url = safe_url_string(link.url, response_encoding)
            link.text = link.text.decode(response_encoding)
            ret.append(link)

        return ret 
Example #16
Source Project: learn_python3_spider   Author: wistbean   File: regex.py    License: MIT License 6 votes vote down vote up
def _extract_links(self, response_text, response_url, response_encoding, base_url=None):
        def clean_text(text):
            return replace_escape_chars(remove_tags(text.decode(response_encoding))).strip()

        def clean_url(url):
            clean_url = ''
            try:
                clean_url = urljoin(base_url, replace_entities(clean_link(url.decode(response_encoding))))
            except ValueError:
                pass
            return clean_url

        if base_url is None:
            base_url = get_base_url(response_text, response_url, response_encoding)

        links_text = linkre.findall(response_text)
        return [Link(clean_url(url).encode(response_encoding),
                     clean_text(text))
                for url, _, text in links_text] 
Example #17
Source Project: learn_python3_spider   Author: wistbean   File: lxmlhtml.py    License: MIT License 6 votes vote down vote up
def _extract_links(self, selector, response_url, response_encoding, base_url):
        links = []
        # hacky way to get the underlying lxml parsed document
        for el, attr, attr_val in self._iter_links(selector.root):
            # pseudo lxml.html.HtmlElement.make_links_absolute(base_url)
            try:
                if self.strip:
                    attr_val = strip_html5_whitespace(attr_val)
                attr_val = urljoin(base_url, attr_val)
            except ValueError:
                continue  # skipping bogus links
            else:
                url = self.process_attr(attr_val)
                if url is None:
                    continue
            url = to_native_str(url, encoding=response_encoding)
            # to fix relative links after process_value
            url = urljoin(response_url, url)
            link = Link(url, _collect_string_content(el) or u'',
                        nofollow=rel_has_nofollow(el.get('rel')))
            links.append(link)
        return self._deduplicate_if_needed(links) 
Example #18
Source Project: learn_python3_spider   Author: wistbean   File: sgml.py    License: MIT License 6 votes vote down vote up
def _extract_links(self, response_text, response_url, response_encoding, base_url=None):
        """ Do the real extraction work """
        self.reset()
        self.feed(response_text)
        self.close()

        ret = []
        if base_url is None:
            base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
        for link in self.links:
            if isinstance(link.url, six.text_type):
                link.url = link.url.encode(response_encoding)
            try:
                link.url = urljoin(base_url, link.url)
            except ValueError:
                continue
            link.url = safe_url_string(link.url, response_encoding)
            link.text = to_unicode(link.text, response_encoding, errors='replace').strip()
            ret.append(link)

        return ret 
Example #19
Source Project: pyspider   Author: binux   File: tornado_fetcher.py    License: Apache License 2.0 5 votes vote down vote up
def can_fetch(self, user_agent, url):
        parsed = urlsplit(url)
        domain = parsed.netloc
        if domain in self.robots_txt_cache:
            robot_txt = self.robots_txt_cache[domain]
            if time.time() - robot_txt.mtime() > self.robot_txt_age:
                robot_txt = None
        else:
            robot_txt = None

        if robot_txt is None:
            robot_txt = RobotFileParser()
            try:
                response = yield gen.maybe_future(self.http_client.fetch(
                    urljoin(url, '/robots.txt'), connect_timeout=10, request_timeout=30))
                content = response.body
            except tornado.httpclient.HTTPError as e:
                logger.error('load robots.txt from %s error: %r', domain, e)
                content = ''

            try:
                content = content.decode('utf8', 'ignore')
            except UnicodeDecodeError:
                content = ''

            robot_txt.parse(content.splitlines())
            self.robots_txt_cache[domain] = robot_txt

        raise gen.Return(robot_txt.can_fetch(user_agent, url)) 
Example #20
Source Project: pyspider   Author: binux   File: app.py    License: Apache License 2.0 5 votes vote down vote up
def cdn_url_handler(error, endpoint, kwargs):
    if endpoint == 'cdn':
        path = kwargs.pop('path')
        # cdn = app.config.get('cdn', 'http://cdn.staticfile.org/')
        # cdn = app.config.get('cdn', '//cdnjs.cloudflare.com/ajax/libs/')
        cdn = app.config.get('cdn', '//cdnjscn.b0.upaiyun.com/libs/')
        return urljoin(cdn, path)
    else:
        exc_type, exc_value, tb = sys.exc_info()
        if exc_value is error:
            reraise(exc_type, exc_value, tb)
        else:
            raise error 
Example #21
Source Project: kubeshift   Author: cdrage   File: base.py    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
def _format_url(urlbase, urlpath):
    if not urlbase.endswith('/'):
        urlbase += '/'
    if urlpath:
        urlpath = urlpath.lstrip('/')
    return urlparse.urljoin(urlbase, urlpath) 
Example #22
Source Project: pythonista-tools-installer   Author: ywangd   File: ptinstaller.py    License: MIT License 5 votes vote down vote up
def contents(owner, repo):
        r = requests.get(urljoin(GitHubAPI.API_URL, 'repos/{}/{}/contents'.format(owner, repo)))
        return r.json() 
Example #23
Source Project: pythonista-tools-installer   Author: ywangd   File: ptinstaller.py    License: MIT License 5 votes vote down vote up
def download(self, url):
        user_name, repo_name = self.get_github_user_repo(url)
        zipfile_url = urljoin(url, '/%s/%s/archive/master.zip' % (user_name, repo_name))
        tmp_zipfile = os.path.join(os.environ['TMPDIR'], '%s-master.zip' % repo_name)

        r = requests.get(zipfile_url)
        with open(tmp_zipfile, 'wb') as outs:
            outs.write(r.content)

        return tmp_zipfile 
Example #24
Source Project: tox   Author: tox-dev   File: test_provision.py    License: MIT License 5 votes vote down vote up
def space_path2url(path):
    at_path = str(path)
    if " " not in at_path:
        return at_path
    return urljoin("file:", pathname2url(os.path.abspath(at_path))) 
Example #25
Source Project: pagure   Author: Pagure   File: utils.py    License: GNU General Public License v2.0 5 votes vote down vote up
def is_safe_url(target):  # pragma: no cover
    """ Checks that the target url is safe and sending to the current
    website not some other malicious one.
    """
    ref_url = urlparse(flask.request.host_url)
    test_url = urlparse(urljoin(flask.request.host_url, target))
    return (
        test_url.scheme in ("http", "https")
        and ref_url.netloc == test_url.netloc
    ) 
Example #26
Source Project: pagure   Author: Pagure   File: login.py    License: GNU General Public License v2.0 5 votes vote down vote up
def send_confirmation_email(user):
    """ Sends the confirmation email asking the user to confirm its email
    address.
    """
    if not user.emails:
        return

    # The URL of this instance
    instance_url = pagure.config.config.get("APP_URL", flask.request.url_root)

    # A link with a secret token to confirm the registration
    confirmation_url = urljoin(
        instance_url, flask.url_for("ui_ns.confirm_user", token=user.token),
    )

    message = """Dear %(username)s,

Thank you for registering on pagure at %(instance_url)s.

To finish your registration, please click on the following link or copy/paste
it in your browser:

    %(confirmation_url)s

Your account will not be activated until you finish this step.

Sincerely,
Your pagure admin.
""" % (
        {
            "username": user.username,
            "instance_url": instance_url,
            "confirmation_url": confirmation_url,
        }
    )

    pagure.lib.notify.send_email(
        text=message,
        subject="Confirm your user account",
        to_mail=user.emails[0].email,
    ) 
Example #27
Source Project: pagure   Author: Pagure   File: login.py    License: GNU General Public License v2.0 5 votes vote down vote up
def send_lostpassword_email(user):
    """ Sends the email with the information on how to reset his/her password
    to the user.
    """
    if not user.emails:
        return

    url = pagure.config.config.get("APP_URL", flask.request.url_root)

    url = urljoin(
        url or flask.request.url_root,
        flask.url_for("ui_ns.reset_password", token=user.token),
    )

    message = """ Dear %(username)s,

The IP address %(ip)s has requested a password change for this account.

If you wish to change your password, please click on the following link or
copy/paste it in your browser:
  %(url)s

If you did not request this change, please inform an admin immediately!

Sincerely,
Your pagure admin.
""" % (
        {
            "username": user.username,
            "url": url,
            "ip": flask.request.remote_addr,
        }
    )

    pagure.lib.notify.send_email(
        text=message,
        subject="Confirm your password change",
        to_mail=user.emails[0].email,
    ) 
Example #28
Source Project: pymonzo   Author: pawelad   File: monzo_api.py    License: MIT License 5 votes vote down vote up
def _refresh_oath_token(self):
        """
        Refresh Monzo OAuth 2 token.

        Official docs:
            https://monzo.com/docs/#refreshing-access

        :raises UnableToRefreshTokenException: when token couldn't be refreshed
        """
        url = urljoin(self.api_url, '/oauth2/token')
        data = {
            'grant_type': 'refresh_token',
            'client_id': self._client_id,
            'client_secret': self._client_secret,
            'refresh_token': self._token['refresh_token'],
        }

        token_response = requests.post(url, data=data)
        token = token_response.json()

        # Not ideal, but that's how Monzo API returns errors
        if 'error' in token:
            raise CantRefreshTokenError(
                "Unable to refresh the token: {}".format(token)
            )

        self._token = token
        self._save_token_on_disk() 
Example #29
Source Project: pymonzo   Author: pawelad   File: monzo_api.py    License: MIT License 5 votes vote down vote up
def _get_response(self, method, endpoint, params=None):
        """
        Helper method to handle HTTP requests and catch API errors

        :param method: valid HTTP method
        :type method: str
        :param endpoint: API endpoint
        :type endpoint: str
        :param params: extra parameters passed with the request
        :type params: dict
        :returns: API response
        :rtype: Response
        """
        url = urljoin(self.api_url, endpoint)

        try:
            response = getattr(self._session, method)(url, params=params)

            # Check if Monzo API returned HTTP 401, which could mean that the
            # token is expired
            if response.status_code == 401:
                raise TokenExpiredError

        except TokenExpiredError:
            # For some reason 'requests-oauthlib' automatic token refreshing
            # doesn't work so we do it here semi-manually
            self._refresh_oath_token()

            self._session = OAuth2Session(
                client_id=self._client_id,
                token=self._token,
            )

            response = getattr(self._session, method)(url, params=params)

        if response.status_code != requests.codes.ok:
            raise MonzoAPIError(
                "Something went wrong: {}".format(response.json())
            )

        return response 
Example #30
Source Project: pymonzo   Author: pawelad   File: test_monzo_api.py    License: MIT License 5 votes vote down vote up
def test_class_refresh_oath_token_method(self, mocker, mocked_monzo):
        """Test class `_refresh_oath_token` method"""
        mocked_requests_post_json = mocker.MagicMock()
        mocked_requests_post = mocker.patch('pymonzo.monzo_api.requests.post')
        mocked_requests_post.return_value.json = mocked_requests_post_json
        mocked_save_token_on_disk = mocker.patch(
            'pymonzo.monzo_api.MonzoAPI._save_token_on_disk'
        )

        expected_data = {
            'grant_type': 'refresh_token',
            'client_id': mocked_monzo._client_id,
            'client_secret': mocked_monzo._client_secret,
            'refresh_token': mocked_monzo._token['refresh_token'],
        }

        mocked_monzo._refresh_oath_token()

        assert mocked_monzo._token == mocked_requests_post_json.return_value

        mocked_requests_post.assert_called_once_with(
            urljoin(mocked_monzo.api_url, '/oauth2/token'),
            data=expected_data,
        )
        mocked_requests_post_json.assert_called_once_with()
        mocked_save_token_on_disk.assert_called_once_with()