Python six.moves.urllib.parse.urljoin() Examples
The following are 30
code examples of six.moves.urllib.parse.urljoin().
These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
six.moves.urllib.parse
, or try the search function
.

Example #1
Source Project: pymonzo Author: pawelad File: monzo_api.py License: MIT License | 7 votes |
def _get_oauth_token(self): """ Get Monzo access token via OAuth2 `authorization code` grant type. Official docs: https://monzo.com/docs/#acquire-an-access-token :returns: OAuth 2 access token :rtype: dict """ url = urljoin(self.api_url, '/oauth2/token') oauth = OAuth2Session( client_id=self._client_id, redirect_uri=config.REDIRECT_URI, ) token = oauth.fetch_token( token_url=url, code=self._auth_code, client_secret=self._client_secret, ) return token
Example #2
Source Project: scrapy-cluster Author: istresearch File: lxmlhtml.py License: MIT License | 6 votes |
def _extract_links(self, selector, response_url, response_encoding, base_url): ''' Pretty much the same function, just added 'ignore' to to_native_str() ''' links = [] # hacky way to get the underlying lxml parsed document for el, attr, attr_val in self._iter_links(selector.root): # pseudo lxml.html.HtmlElement.make_links_absolute(base_url) try: attr_val = urljoin(base_url, attr_val) except ValueError: continue # skipping bogus links else: url = self.process_attr(attr_val) if url is None: continue # added 'ignore' to encoding errors url = to_native_str(url, encoding=response_encoding, errors='ignore') # to fix relative links after process_value url = urljoin(response_url, url) link = Link(url, _collect_string_content(el) or u'', nofollow=rel_has_nofollow(el.get('rel'))) links.append(link) return self._deduplicate_if_needed(links)
Example #3
Source Project: pymonzo Author: pawelad File: test_monzo_api.py License: MIT License | 6 votes |
def test_class_get_oauth_token_method(self, mocker, mocked_monzo): """Test class `_get_oauth_token` method""" mocked_fetch_token = mocker.MagicMock() mocked_oauth2_session = mocker.patch('pymonzo.monzo_api.OAuth2Session') mocked_oauth2_session.return_value.fetch_token = mocked_fetch_token token = mocked_monzo._get_oauth_token() assert token == mocked_fetch_token.return_value mocked_oauth2_session.assert_called_once_with( client_id=mocked_monzo._client_id, redirect_uri=config.REDIRECT_URI, ) mocked_fetch_token.assert_called_once_with( token_url=urljoin(mocked_monzo.api_url, '/oauth2/token'), code=mocked_monzo._auth_code, client_secret=mocked_monzo._client_secret, )
Example #4
Source Project: designate Author: openstack File: connector.py License: Apache License 2.0 | 6 votes |
def _construct_url(self, relative_path, query_params=None, extattrs=None): if query_params is None: query_params = {} if extattrs is None: extattrs = {} if not relative_path or relative_path[0] == '/': raise ValueError('Path in request must be relative.') query = '' if query_params or extattrs: query = '?' if extattrs: attrs_queries = [] for key, value in extattrs.items(): LOG.debug("key: %s, value: %s", key, value) attrs_queries.append('*' + key + '=' + value['value']) query += '&'.join(attrs_queries) if query_params: if len(query) > 1: query += '&' query += parse.urlencode(query_params) baseurl = parse.urljoin(self.wapi_url, parse.quote(relative_path)) return baseurl + query
Example #5
Source Project: stockfighter Author: striglia File: stockfighter.py License: ISC License | 6 votes |
def place_new_order(self, stock, price, qty, direction, order_type): """Place an order for a stock. https://starfighter.readme.io/docs/place-new-order """ url_fragment = 'venues/{venue}/stocks/{stock}/orders'.format( venue=self.venue, stock=stock, ) data = { "stock": stock, "price": price, "venue": self.venue, "account": self.account, "qty": qty, "direction": direction, "orderType": order_type, } url = urljoin(self.base_url, url_fragment) resp = self.session.post(url, json=data) return resp.json()
Example #6
Source Project: scrape Author: huntrar File: utils.py License: MIT License | 6 votes |
def clean_url(url, base_url=None): """Add base netloc and path to internal URLs and remove www, fragments.""" parsed_url = urlparse(url) fragment = "{url.fragment}".format(url=parsed_url) if fragment: url = url.split(fragment)[0] # Identify internal URLs and fix their format netloc = "{url.netloc}".format(url=parsed_url) if base_url is not None and not netloc: parsed_base = urlparse(base_url) split_base = "{url.scheme}://{url.netloc}{url.path}/".format(url=parsed_base) url = urljoin(split_base, url) netloc = "{url.netloc}".format(url=urlparse(url)) if "www." in netloc: url = url.replace(netloc, netloc.replace("www.", "")) return url.rstrip(string.punctuation)
Example #7
Source Project: sentinelsat Author: sentinelsat File: sentinel.py License: GNU General Public License v3.0 | 6 votes |
def is_online(self, id): """Returns whether a product is online Parameters ---------- id : string UUID of the product, e.g. 'a8dd0cfd-613e-45ce-868c-d79177b916ed' Returns ------- bool True if online, False if in LTA """ # Check https://scihub.copernicus.eu/userguide/ODataAPI#Products_entity for more information url = urljoin(self.api_url, "odata/v1/Products('{}')/Online/$value".format(id)) r = self.session.get(url, auth=self.session.auth, timeout=self.timeout) _check_scihub_response(r) return r.json()
Example #8
Source Project: pulsar Author: galaxyproject File: interface.py License: Apache License 2.0 | 6 votes |
def __init__(self, destination_params, transport): self.transport = transport remote_host = destination_params.get("url") assert remote_host is not None, "Failed to determine url for Pulsar client." if not remote_host.startswith("http"): remote_host = "http://%s" % remote_host manager = destination_params.get("manager", None) if manager: if "/managers/" in remote_host: log.warning("Ignoring manager tag '%s', Pulsar client URL already contains a \"/managers/\" path." % manager) else: remote_host = urljoin(remote_host, "managers/%s" % manager) if not remote_host.endswith("/"): remote_host = "%s/" % remote_host self.remote_host = remote_host self.private_token = destination_params.get("private_token", None)
Example #9
Source Project: hfut Author: elonzh File: session.py License: MIT License | 6 votes |
def prepare_request(self, request): parsed = parse.urlparse(request.url) # 非法字符检查 if ENV['REQUEST_ARGUMENTS_CHECK'] and (not parsed.netloc or parsed.netloc == parse.urlparse(self.host).netloc): for k, v in reduce(lambda x, y: x + list(y.items()), (request.params, request.data), []): pattern = ENV['ILLEGAL_CHARACTERS_PATTERN'] result = pattern.search(str(k)) or pattern.search(str(v)) if result: msg = ''.join(['参数中出现非法字符: ', result.group()]) raise ValidationError(msg) if not parsed.netloc: # requests 在准备 url 进行解析, 因此只能在准备前将 url 换成完整的地址 # requests.models.PreparedRequest#prepare_url request.url = parse.urljoin(self.host, request.url) return super(BaseSession, self).prepare_request(request)
Example #10
Source Project: conda-concourse-ci Author: conda File: uploads.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_upload_channels(upload_config_dir, subdir, channels=None): """thought here was to provide whatever channel you have set as an output also to be an input Killed this in favor of setting channels in condarc in the docker image. """ configurations = load_yaml_config_dir(upload_config_dir) channels = channels or [] for config in configurations: if 'token' in config: channels.append(config['user']) elif 'server' in config: channels.append(parse.urljoin('http://' + config['server'], config['destination_path'].format(subdir=subdir))) else: channels.append(config['channel']) return channels
Example #11
Source Project: learn_python3_spider Author: wistbean File: redirect.py License: MIT License | 6 votes |
def process_response(self, request, response, spider): if (request.meta.get('dont_redirect', False) or response.status in getattr(spider, 'handle_httpstatus_list', []) or response.status in request.meta.get('handle_httpstatus_list', []) or request.meta.get('handle_httpstatus_all', False)): return response allowed_status = (301, 302, 303, 307, 308) if 'Location' not in response.headers or response.status not in allowed_status: return response location = safe_url_string(response.headers['location']) redirected_url = urljoin(request.url, location) if response.status in (301, 307, 308) or request.method == 'HEAD': redirected = request.replace(url=redirected_url) return self._redirect(redirected, request, spider, response.status) redirected = self._redirect_request_using_get(request, redirected_url) return self._redirect(redirected, request, spider, response.status)
Example #12
Source Project: learn_python3_spider Author: wistbean File: regex.py License: MIT License | 6 votes |
def _extract_links(self, response_text, response_url, response_encoding, base_url=None): def clean_text(text): return replace_escape_chars(remove_tags(text.decode(response_encoding))).strip() def clean_url(url): clean_url = '' try: clean_url = urljoin(base_url, replace_entities(clean_link(url.decode(response_encoding)))) except ValueError: pass return clean_url if base_url is None: base_url = get_base_url(response_text, response_url, response_encoding) links_text = linkre.findall(response_text) return [Link(clean_url(url).encode(response_encoding), clean_text(text)) for url, _, text in links_text]
Example #13
Source Project: learn_python3_spider Author: wistbean File: lxmlhtml.py License: MIT License | 6 votes |
def _extract_links(self, selector, response_url, response_encoding, base_url): links = [] # hacky way to get the underlying lxml parsed document for el, attr, attr_val in self._iter_links(selector.root): # pseudo lxml.html.HtmlElement.make_links_absolute(base_url) try: if self.strip: attr_val = strip_html5_whitespace(attr_val) attr_val = urljoin(base_url, attr_val) except ValueError: continue # skipping bogus links else: url = self.process_attr(attr_val) if url is None: continue url = to_native_str(url, encoding=response_encoding) # to fix relative links after process_value url = urljoin(response_url, url) link = Link(url, _collect_string_content(el) or u'', nofollow=rel_has_nofollow(el.get('rel'))) links.append(link) return self._deduplicate_if_needed(links)
Example #14
Source Project: learn_python3_spider Author: wistbean File: sgml.py License: MIT License | 6 votes |
def _extract_links(self, response_text, response_url, response_encoding, base_url=None): """ Do the real extraction work """ self.reset() self.feed(response_text) self.close() ret = [] if base_url is None: base_url = urljoin(response_url, self.base_url) if self.base_url else response_url for link in self.links: if isinstance(link.url, six.text_type): link.url = link.url.encode(response_encoding) try: link.url = urljoin(base_url, link.url) except ValueError: continue link.url = safe_url_string(link.url, response_encoding) link.text = to_unicode(link.text, response_encoding, errors='replace').strip() ret.append(link) return ret
Example #15
Source Project: learn_python3_spider Author: wistbean File: htmlparser.py License: MIT License | 6 votes |
def _extract_links(self, response_text, response_url, response_encoding): self.reset() self.feed(response_text) self.close() links = unique_list(self.links, key=lambda link: link.url) if self.unique else self.links ret = [] base_url = urljoin(response_url, self.base_url) if self.base_url else response_url for link in links: if isinstance(link.url, six.text_type): link.url = link.url.encode(response_encoding) try: link.url = urljoin(base_url, link.url) except ValueError: continue link.url = safe_url_string(link.url, response_encoding) link.text = link.text.decode(response_encoding) ret.append(link) return ret
Example #16
Source Project: learn_python3_spider Author: wistbean File: regex.py License: MIT License | 6 votes |
def _extract_links(self, response_text, response_url, response_encoding, base_url=None): def clean_text(text): return replace_escape_chars(remove_tags(text.decode(response_encoding))).strip() def clean_url(url): clean_url = '' try: clean_url = urljoin(base_url, replace_entities(clean_link(url.decode(response_encoding)))) except ValueError: pass return clean_url if base_url is None: base_url = get_base_url(response_text, response_url, response_encoding) links_text = linkre.findall(response_text) return [Link(clean_url(url).encode(response_encoding), clean_text(text)) for url, _, text in links_text]
Example #17
Source Project: learn_python3_spider Author: wistbean File: lxmlhtml.py License: MIT License | 6 votes |
def _extract_links(self, selector, response_url, response_encoding, base_url): links = [] # hacky way to get the underlying lxml parsed document for el, attr, attr_val in self._iter_links(selector.root): # pseudo lxml.html.HtmlElement.make_links_absolute(base_url) try: if self.strip: attr_val = strip_html5_whitespace(attr_val) attr_val = urljoin(base_url, attr_val) except ValueError: continue # skipping bogus links else: url = self.process_attr(attr_val) if url is None: continue url = to_native_str(url, encoding=response_encoding) # to fix relative links after process_value url = urljoin(response_url, url) link = Link(url, _collect_string_content(el) or u'', nofollow=rel_has_nofollow(el.get('rel'))) links.append(link) return self._deduplicate_if_needed(links)
Example #18
Source Project: learn_python3_spider Author: wistbean File: sgml.py License: MIT License | 6 votes |
def _extract_links(self, response_text, response_url, response_encoding, base_url=None): """ Do the real extraction work """ self.reset() self.feed(response_text) self.close() ret = [] if base_url is None: base_url = urljoin(response_url, self.base_url) if self.base_url else response_url for link in self.links: if isinstance(link.url, six.text_type): link.url = link.url.encode(response_encoding) try: link.url = urljoin(base_url, link.url) except ValueError: continue link.url = safe_url_string(link.url, response_encoding) link.text = to_unicode(link.text, response_encoding, errors='replace').strip() ret.append(link) return ret
Example #19
Source Project: pyspider Author: binux File: tornado_fetcher.py License: Apache License 2.0 | 5 votes |
def can_fetch(self, user_agent, url): parsed = urlsplit(url) domain = parsed.netloc if domain in self.robots_txt_cache: robot_txt = self.robots_txt_cache[domain] if time.time() - robot_txt.mtime() > self.robot_txt_age: robot_txt = None else: robot_txt = None if robot_txt is None: robot_txt = RobotFileParser() try: response = yield gen.maybe_future(self.http_client.fetch( urljoin(url, '/robots.txt'), connect_timeout=10, request_timeout=30)) content = response.body except tornado.httpclient.HTTPError as e: logger.error('load robots.txt from %s error: %r', domain, e) content = '' try: content = content.decode('utf8', 'ignore') except UnicodeDecodeError: content = '' robot_txt.parse(content.splitlines()) self.robots_txt_cache[domain] = robot_txt raise gen.Return(robot_txt.can_fetch(user_agent, url))
Example #20
Source Project: pyspider Author: binux File: app.py License: Apache License 2.0 | 5 votes |
def cdn_url_handler(error, endpoint, kwargs): if endpoint == 'cdn': path = kwargs.pop('path') # cdn = app.config.get('cdn', 'http://cdn.staticfile.org/') # cdn = app.config.get('cdn', '//cdnjs.cloudflare.com/ajax/libs/') cdn = app.config.get('cdn', '//cdnjscn.b0.upaiyun.com/libs/') return urljoin(cdn, path) else: exc_type, exc_value, tb = sys.exc_info() if exc_value is error: reraise(exc_type, exc_value, tb) else: raise error
Example #21
Source Project: kubeshift Author: cdrage File: base.py License: GNU Lesser General Public License v3.0 | 5 votes |
def _format_url(urlbase, urlpath): if not urlbase.endswith('/'): urlbase += '/' if urlpath: urlpath = urlpath.lstrip('/') return urlparse.urljoin(urlbase, urlpath)
Example #22
Source Project: pythonista-tools-installer Author: ywangd File: ptinstaller.py License: MIT License | 5 votes |
def contents(owner, repo): r = requests.get(urljoin(GitHubAPI.API_URL, 'repos/{}/{}/contents'.format(owner, repo))) return r.json()
Example #23
Source Project: pythonista-tools-installer Author: ywangd File: ptinstaller.py License: MIT License | 5 votes |
def download(self, url): user_name, repo_name = self.get_github_user_repo(url) zipfile_url = urljoin(url, '/%s/%s/archive/master.zip' % (user_name, repo_name)) tmp_zipfile = os.path.join(os.environ['TMPDIR'], '%s-master.zip' % repo_name) r = requests.get(zipfile_url) with open(tmp_zipfile, 'wb') as outs: outs.write(r.content) return tmp_zipfile
Example #24
Source Project: tox Author: tox-dev File: test_provision.py License: MIT License | 5 votes |
def space_path2url(path): at_path = str(path) if " " not in at_path: return at_path return urljoin("file:", pathname2url(os.path.abspath(at_path)))
Example #25
Source Project: pagure Author: Pagure File: utils.py License: GNU General Public License v2.0 | 5 votes |
def is_safe_url(target): # pragma: no cover """ Checks that the target url is safe and sending to the current website not some other malicious one. """ ref_url = urlparse(flask.request.host_url) test_url = urlparse(urljoin(flask.request.host_url, target)) return ( test_url.scheme in ("http", "https") and ref_url.netloc == test_url.netloc )
Example #26
Source Project: pagure Author: Pagure File: login.py License: GNU General Public License v2.0 | 5 votes |
def send_confirmation_email(user): """ Sends the confirmation email asking the user to confirm its email address. """ if not user.emails: return # The URL of this instance instance_url = pagure.config.config.get("APP_URL", flask.request.url_root) # A link with a secret token to confirm the registration confirmation_url = urljoin( instance_url, flask.url_for("ui_ns.confirm_user", token=user.token), ) message = """Dear %(username)s, Thank you for registering on pagure at %(instance_url)s. To finish your registration, please click on the following link or copy/paste it in your browser: %(confirmation_url)s Your account will not be activated until you finish this step. Sincerely, Your pagure admin. """ % ( { "username": user.username, "instance_url": instance_url, "confirmation_url": confirmation_url, } ) pagure.lib.notify.send_email( text=message, subject="Confirm your user account", to_mail=user.emails[0].email, )
Example #27
Source Project: pagure Author: Pagure File: login.py License: GNU General Public License v2.0 | 5 votes |
def send_lostpassword_email(user): """ Sends the email with the information on how to reset his/her password to the user. """ if not user.emails: return url = pagure.config.config.get("APP_URL", flask.request.url_root) url = urljoin( url or flask.request.url_root, flask.url_for("ui_ns.reset_password", token=user.token), ) message = """ Dear %(username)s, The IP address %(ip)s has requested a password change for this account. If you wish to change your password, please click on the following link or copy/paste it in your browser: %(url)s If you did not request this change, please inform an admin immediately! Sincerely, Your pagure admin. """ % ( { "username": user.username, "url": url, "ip": flask.request.remote_addr, } ) pagure.lib.notify.send_email( text=message, subject="Confirm your password change", to_mail=user.emails[0].email, )
Example #28
Source Project: pymonzo Author: pawelad File: monzo_api.py License: MIT License | 5 votes |
def _refresh_oath_token(self): """ Refresh Monzo OAuth 2 token. Official docs: https://monzo.com/docs/#refreshing-access :raises UnableToRefreshTokenException: when token couldn't be refreshed """ url = urljoin(self.api_url, '/oauth2/token') data = { 'grant_type': 'refresh_token', 'client_id': self._client_id, 'client_secret': self._client_secret, 'refresh_token': self._token['refresh_token'], } token_response = requests.post(url, data=data) token = token_response.json() # Not ideal, but that's how Monzo API returns errors if 'error' in token: raise CantRefreshTokenError( "Unable to refresh the token: {}".format(token) ) self._token = token self._save_token_on_disk()
Example #29
Source Project: pymonzo Author: pawelad File: monzo_api.py License: MIT License | 5 votes |
def _get_response(self, method, endpoint, params=None): """ Helper method to handle HTTP requests and catch API errors :param method: valid HTTP method :type method: str :param endpoint: API endpoint :type endpoint: str :param params: extra parameters passed with the request :type params: dict :returns: API response :rtype: Response """ url = urljoin(self.api_url, endpoint) try: response = getattr(self._session, method)(url, params=params) # Check if Monzo API returned HTTP 401, which could mean that the # token is expired if response.status_code == 401: raise TokenExpiredError except TokenExpiredError: # For some reason 'requests-oauthlib' automatic token refreshing # doesn't work so we do it here semi-manually self._refresh_oath_token() self._session = OAuth2Session( client_id=self._client_id, token=self._token, ) response = getattr(self._session, method)(url, params=params) if response.status_code != requests.codes.ok: raise MonzoAPIError( "Something went wrong: {}".format(response.json()) ) return response
Example #30
Source Project: pymonzo Author: pawelad File: test_monzo_api.py License: MIT License | 5 votes |
def test_class_refresh_oath_token_method(self, mocker, mocked_monzo): """Test class `_refresh_oath_token` method""" mocked_requests_post_json = mocker.MagicMock() mocked_requests_post = mocker.patch('pymonzo.monzo_api.requests.post') mocked_requests_post.return_value.json = mocked_requests_post_json mocked_save_token_on_disk = mocker.patch( 'pymonzo.monzo_api.MonzoAPI._save_token_on_disk' ) expected_data = { 'grant_type': 'refresh_token', 'client_id': mocked_monzo._client_id, 'client_secret': mocked_monzo._client_secret, 'refresh_token': mocked_monzo._token['refresh_token'], } mocked_monzo._refresh_oath_token() assert mocked_monzo._token == mocked_requests_post_json.return_value mocked_requests_post.assert_called_once_with( urljoin(mocked_monzo.api_url, '/oauth2/token'), data=expected_data, ) mocked_requests_post_json.assert_called_once_with() mocked_save_token_on_disk.assert_called_once_with()