Python urllib.request.read() Examples
The following are 23
code examples of urllib.request.read().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
urllib.request
, or try the search function
.
Example #1
Source File: download.py From deep-koalarization with MIT License | 6 votes |
def _download_img(self, image_url: str): """Download single image. Args: image_url (str): Image url. Returns: Union[str, None]: Image path if image was succesfully downloaded. Otherwise, None. """ image_name = self._encode_image_name(image_url) image_path = join(self.dest_dir, image_name) if not isfile(image_path): try: # TODO use request.get with accept jpg? request = urllib.request.urlopen(image_url, timeout=5) image = request.read() if imghdr.what("", image) == "jpeg": with open(image_path, "wb") as f: f.write(image) except Exception as e: print("Error downloading {}: {}".format(image_url, e), file=sys.stderr) return None return image_path
Example #2
Source File: views.py From StockSensation with Apache License 2.0 | 6 votes |
def get_segList(stocknumber): segList = [] for pageNum in range(1, 21): urlPage = 'http://guba.eastmoney.com/list,' + \ str(stocknumber) + '_' + str(pageNum) + '.html' stockPageRequest = urllib.request.urlopen(urlPage) htmlTitleContent = str(stockPageRequest.read(), 'utf-8') titlePattern = re.compile( '<span class="l3">(.*?)title="(.*?)"(.*?)<span class="l6">(\d\d)-(\d\d)</span>', re.S) gotTitle = re.findall(titlePattern, htmlTitleContent) for i in range(len(gotTitle)): for j in range(len(dateCount)): if int(gotTitle[i][3]) == dateCount[j][0] and int(gotTitle[i][4]) == dateCount[j][1]: segSentence = list(jieba.cut(gotTitle[i][1], cut_all=True)) segList.append(segSentence) return segList # 分类器构建和数据持久化
Example #3
Source File: binary_utils.py From RLs with Apache License 2.0 | 6 votes |
def load_remote_manifest(url: str) -> Dict[str, Any]: """ Converts a remote yaml file into a Python dictionary """ tmp_dir, _ = get_tmp_dir() try: request = urllib.request.urlopen(url, timeout=30) except urllib.error.HTTPError as e: # type: ignore e.msg += " " + url raise manifest_path = os.path.join(tmp_dir, str(uuid.uuid4()) + ".yaml") with open(manifest_path, "wb") as manifest: while True: buffer = request.read(BLOCK_SIZE) if not buffer: # There is nothing more to read break manifest.write(buffer) try: result = load_local_manifest(manifest_path) finally: os.remove(manifest_path) return result
Example #4
Source File: torrentcast.py From platypush with MIT License | 6 votes |
def search(self, query): request = urllib.request.urlopen(urllib.request.Request( 'https://api.apidomain.info/list?' + urllib.parse.urlencode({ 'sort': 'relevance', 'quality': '720p,1080p,3d', 'page': 1, 'keywords': query, }), headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' + '(KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36' }) ) results = json.loads(request.read()) return results
Example #5
Source File: torrentcast.py From platypush with MIT License | 5 votes |
def pause(self): http = urllib3.PoolManager() request = http.request('POST', 'http://{}:{}/pause/'.format(self.server, self.port)) self.state = PlayerState.PAUSE.value return request.read()
Example #6
Source File: utils.py From BlendLuxCore with GNU General Public License v3.0 | 5 votes |
def calc_hash(filename): BLOCK_SIZE = 65536 file_hash = hashlib.sha256() with open(filename, 'rb') as file: block = file.read(BLOCK_SIZE) while len(block) > 0: file_hash.update(block) block = file.read(BLOCK_SIZE) return file_hash.hexdigest()
Example #7
Source File: utils.py From BlendLuxCore with GNU General Public License v3.0 | 5 votes |
def download_table_of_contents(context): scene = context.scene try: import urllib.request with urllib.request.urlopen(LOL_HOST_URL + "/assets_model.json", timeout=60) as request: import json scene.luxcoreOL.model['assets'] = json.loads(request.read()) for asset in scene.luxcoreOL.model['assets']: asset['downloaded'] = 0.0 # with urllib.request.urlopen(LOL_HOST_URL + "/assets_scene.json", timeout=60) as request: # import json # scene.luxcoreOL.scene['assets'] = json.loads(request.read()) # for asset in scene.luxcoreOL.scene['assets']: # asset['downloaded'] = 0.0 with urllib.request.urlopen(LOL_HOST_URL + "/assets_material.json", timeout=60) as request: import json scene.luxcoreOL.material['assets'] = json.loads(request.read()) for asset in scene.luxcoreOL.material['assets']: asset['downloaded'] = 0.0 context.scene.luxcoreOL.ui.ToC_loaded = True init_categories(context) bg_task = Thread(target=check_cache, args=(context, )) bg_task.start() return True except ConnectionError as error: print("Connection error: Could not download table of contents") print(error) return False except urllib.error.URLError as error: print("URL error: Could not download table of contents") print(error) return False
Example #8
Source File: ffmpeg_bootstrap.py From maniwani with MIT License | 5 votes |
def write_ffmpeg(executable): os.makedirs(FFMPEG_EXTRACTION_DIR, exist_ok=True) output_path = os.path.join(FFMPEG_EXTRACTION_DIR, FFMPEG_EXECUTABLE) output_file = open(output_path, "wb") output_file.write(executable.read()) os.chmod(output_path, stat.S_IXUSR)
Example #9
Source File: ffmpeg_bootstrap.py From maniwani with MIT License | 5 votes |
def download_ffmpeg_archive(): request = urllib.request.urlopen(FFMPEG_URL) return io.BytesIO(request.read())
Example #10
Source File: views.py From StockSensation with Apache License 2.0 | 5 votes |
def nbopinionResult(request): Nb_stock_number = request.GET['Nb_stock_number'] dateCount = setDate() stock_name = get_stock_name(Nb_stock_number) homedir = os.getcwd() clf = joblib.load(homedir+'/StockVisualData/Clf.pkl') vectorizer = joblib.load(homedir+'/StockVisualData/Vect') transformer = joblib.load(homedir+'/StockVisualData/Tfidf') for pageNum in range(1, 21): urlPage = 'http://guba.eastmoney.com/list,' + \ str(Nb_stock_number)+'_'+str(pageNum)+'.html' stockPageRequest = urllib.request.urlopen(urlPage) htmlTitleContent = str(stockPageRequest.read(), 'utf-8') titlePattern = re.compile( '<span class="l3">(.*?)title="(.*?)"(.*?)<span class="l6">(\d\d)-(\d\d)</span>', re.S) gotTitle = re.findall(titlePattern, htmlTitleContent) for i in range(len(gotTitle)): text_predict = [] for j in range(len(dateCount)): if int(gotTitle[i][3]) == dateCount[j][0] and int(gotTitle[i][4]) == dateCount[j][1]: dateCount[j][5] += 1 seg_list = list(jieba.cut(gotTitle[i][1], cut_all=True)) seg_text = " ".join(seg_list) text_predict.append(seg_text) text_predict = np.array(text_predict) text_frequency = vectorizer.transform(text_predict) new_tfidf = transformer.transform(text_frequency) predicted = clf.predict(new_tfidf) if predicted == '积极': dateCount[j][2] += 1 continue elif predicted == '消极': dateCount[j][3] += 1 continue elif predicted == '中立': dateCount[j][4] += 1 return render(request, 'nbopinionResult.html', {'stock_name': stock_name, 'dateCount': json.dumps(dateCount)}) # 设置时间数组
Example #11
Source File: views.py From StockSensation with Apache License 2.0 | 5 votes |
def dicopinionResult(request): dicStockNum = request.GET['dicStockNum'] dateCount = setDate() stock_name = get_stock_name(dicStockNum) for pageNum in range(1, 10): urlPage = 'http://guba.eastmoney.com/list,' + \ str(dicStockNum)+',f_'+str(pageNum)+'.html' stockPageRequest = urllib.request.urlopen(urlPage) htmlTitleContent = str(stockPageRequest.read(), 'utf-8') titlePattern = re.compile( '<span class="l3">(.*?)title="(.*?)"(.*?)<span class="l6">(\d\d)-(\d\d)</span>', re.S) gotTitle = re.findall(titlePattern, htmlTitleContent) print(type(gotTitle)) for i in range(len(gotTitle)): for j in range(len(dateCount)): if int(gotTitle[i][3]) == dateCount[j][0] and int(gotTitle[i][4]) == dateCount[j][1]: dateCount[j][5] += 1 segList = list(jieba.cut(gotTitle[i][1], cut_all=True)) # print(tx_npl(gotTitle[i][1])) for eachItem in segList: if eachItem != ' ': if eachItem in positiveWord: dateCount[j][2] += 1 continue elif eachItem in negativeWord: dateCount[j][3] += 1 continue elif eachItem in neutralWord: dateCount[j][4] += 1 return render(request, 'dicopinionResult.html', {'stock_name': stock_name, 'dateCount': json.dumps(dateCount)})
Example #12
Source File: torrentcast.py From platypush with MIT License | 5 votes |
def stop(self): http = urllib3.PoolManager() request = http.request('POST', 'http://{}:{}/stop/'.format(self.server, self.port)) self.state = PlayerState.STOP.value return request.read()
Example #13
Source File: integration_tests.py From clusterfuzz with Apache License 2.0 | 5 votes |
def execute(_): """Run integration tests.""" command = 'run_server' indicator = b'Booting worker' try: lines = [] server = common.execute_async( 'python -u butler.py {} --skip-install-deps'.format(command)) test_utils.wait_for_emulator_ready( server, command, indicator, timeout=RUN_SERVER_TIMEOUT, output_lines=lines) # Sleep a small amount of time to ensure the server is definitely ready. time.sleep(1) # Call setup ourselves instead of passing --bootstrap since we have no idea # when that finishes. # TODO(ochang): Make bootstrap a separate butler command and just call that. common.execute( ('python butler.py run setup ' '--non-dry-run --local --config-dir={config_dir}' ).format(config_dir=constants.TEST_CONFIG_DIR), exit_on_error=False) request = urllib.request.urlopen('http://' + constants.DEV_APPSERVER_HOST) request.read() # Raises exception on error except Exception: print('Error occurred:') print(b''.join(lines)) raise finally: server.terminate() # TODO(ochang): Test that bot runs, and do a basic fuzzing session to ensure # things work end to end. print('All end-to-end integration tests passed.')
Example #14
Source File: torrentcast.py From platypush with MIT License | 5 votes |
def play(self, url): request = urllib.request.urlopen( 'http://{}:{}/play/'.format(self.server, self.port), data=urllib.parse.urlencode({ 'url': url }).encode() ) self.state = PlayerState.PLAY.value return request.read()
Example #15
Source File: B13346_10_01-gpx-reporter.py From Learning-Geospatial-Analysis-with-Python-Third-Edition with MIT License | 5 votes |
def wms(minx, miny, maxx, maxy, service, lyr, epsg, style, img, w, h): """Retrieve a wms map image from the specified service and saves it as a JPEG.""" wms = service wms += "?SERVICE=WMS&VERSION=1.1.1&REQUEST=GetMap&" wms += "LAYERS={}".format(lyr) wms += "&STYLES={}&".format(style) wms += "SRS=EPSG:{}&".format(epsg) wms += "BBOX={},{},{},{}&".format(minx, miny, maxx, maxy) wms += "WIDTH={}&".format(w) wms += "HEIGHT={}&".format(h) wms += "FORMAT=image/jpeg" wmsmap = urllib.request.urlopen(wms) with open(img + ".jpg", "wb") as f: f.write(wmsmap.read())
Example #16
Source File: wattpad2epub.py From Wattpad2Epub with GNU General Public License v2.0 | 5 votes |
def get_cover(cover_url): print(cover_url) tries = 5 while tries > 0: try: req = urllib.request.Request(cover_url) req.add_header('User-agent', 'Mozilla/5.0 (Linux x86_64)') request = urllib.request.urlopen(req) temp = request.read() with open('cover.jpg', 'wb') as f: f.write(temp) tries == 0 # break return 1 except Exception as error: tries -= 1 print("Can't retrieve the cover") print(error) return 0 ############################################################################### # TODO: Remove this block when appropriate # Workaround for bug in ebooklib 0.15. # Something goes wrong when adding an image as a cover, and we need to work # around it by replacing the get_template function with our own that takes care # of properly encoding the template as utf8.
Example #17
Source File: wattpad2epub.py From Wattpad2Epub with GNU General Public License v2.0 | 5 votes |
def get_html(url): tries = 5 req = urllib.request.Request(url) req.add_header('User-agent', 'Mozilla/5.0 (Linux x86_64)') # Add DoNotTrack header, do the right thing even if nobody cares req.add_header('DNT', '1') while tries > 0: try: request = urllib.request.urlopen(req) tries = 0 except socket.timeout: if debug: raise tries -= 1 except socket.timeout: if debug: raise tries -= 1 except urllib.error.URLError as e: if debug: raise print("URL Error " + str(e.code) + ": " + e.reason) print("Aborting...") exit() except urllib.error.HTTPError as e: if debug: raise print("HTTP Error " + str(e.code) + ": " + e.reason) print("Aborting...") exit() # html.parser generates problems, I could fix them, but switching to lxml # is easier and faster soup = BeautifulSoup(request.read(), "lxml") return soup
Example #18
Source File: schedule_scaling.py From kube-schedule-scaler with GNU General Public License v3.0 | 5 votes |
def fetch_schedule_actions_from_url(url): request = urllib.request.urlopen(url) try: content = request.read().decode('utf-8') except: content = None finally: request.close() return content
Example #19
Source File: schedule_scaling.py From kube-schedule-scaler with GNU General Public License v3.0 | 5 votes |
def fetch_schedule_actions_s3(url): source = parse_s3_url(url) print(source) s3 = boto3.client('s3') try: element = s3.get_object(**source) except: print('Couldn\'t read %s' % (url)) return '[]' return element['Body'].read().decode('utf-8')
Example #20
Source File: binary_utils.py From RLs with Apache License 2.0 | 4 votes |
def download_and_extract_zip(url: str, name: str) -> None: """ Downloads a zip file under a URL, extracts its contents into a folder with the name argument and gives chmod 755 to all the files it contains. Files are downloaded and extracted into special folders in the temp folder of the machine. :param url: The URL of the zip file :param name: The name that will be given to the folder containing the extracted data """ zip_dir, bin_dir = get_tmp_dir() url_hash = "-" + hashlib.md5(url.encode()).hexdigest() binary_path = os.path.join(bin_dir, name + url_hash) if os.path.exists(binary_path): shutil.rmtree(binary_path) # Download zip try: request = urllib.request.urlopen(url, timeout=30) except urllib.error.HTTPError as e: # type: ignore e.msg += " " + url raise zip_size = int(request.headers["content-length"]) zip_file_path = os.path.join(zip_dir, str(uuid.uuid4()) + ".zip") with open(zip_file_path, "wb") as zip_file: downloaded = 0 while True: buffer = request.read(BLOCK_SIZE) if not buffer: # There is nothing more to read break downloaded += len(buffer) zip_file.write(buffer) downloaded_percent = downloaded / zip_size * 100 print_progress(f" Downloading {name}", downloaded_percent) print("") # Extraction with ZipFileWithProgress(zip_file_path, "r") as zip_ref: zip_ref.extract_zip(f" Extracting {name}", binary_path) # type: ignore print("") # Clean up zip print_progress(f" Cleaning up {name}", 0) os.remove(zip_file_path) # Give permission for f in glob.glob(binary_path + "/**/*", recursive=True): # 16877 is octal 40755, which denotes a directory with permissions 755 os.chmod(f, 16877) print_progress(f" Cleaning up {name}", 100) print("")
Example #21
Source File: views.py From StockSensation with Apache License 2.0 | 4 votes |
def NB_create_model(): # 获取标题文本 text_list = [] for page_num in range(0, 5): # 页数可改 url = 'http://guba.eastmoney.com/list,gssz,f_' + \ str(page_num) + '.html' request = urllib.request.urlopen(url) content = str(request.read(), 'utf-8') pattern = re.compile( '<span class="l3">(.*?)title="(.*?)"(.*?)<span class="l6">(\d\d)-(\d\d)</span>', re.S) itemstemp = re.findall(pattern, content) for i in range(0, len(itemstemp)): seg_list = list(jieba.cut(itemstemp[i][1], cut_all=False)) seg_str = " ".join(seg_list) text_list.append(seg_str) text_list = np.array(text_list) # 标注文本特征 class_vec = [' ']*len(text_list) for i in range(0, len(text_list)): for pos in positiveWord: if pos in text_list[i]: class_vec[i] = '积极' for neg in negativeWord: if neg in text_list[i]: class_vec[i] = '消极' for neu in neutralWord: if neu in text_list[i]: class_vec[i] = '中立' if class_vec[i] == ' ': class_vec[i] = '无立场' # 将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频 vectorizer = CountVectorizer() # 该类会统计每个词语的tf-idf权值 transformer = TfidfTransformer() # 第一个fit_transform是计算tf-idf,第二个fit_transform是将文本转为词频矩阵 tfidf = transformer.fit_transform(vectorizer.fit_transform(text_list)) # 构造分类器 clf = MultinomialNB() clf.fit(tfidf, class_vec) # 持久化保存 joblib.dump(clf, 'Clf.pkl') joblib.dump(vectorizer, 'Vect') joblib.dump(transformer, 'Tf-Idf')
Example #22
Source File: schedule_scaling.py From kube-schedule-scaler with GNU General Public License v3.0 | 4 votes |
def stack_job_creator(): """ Create CronJobs for configured Stacks """ stacks__to_scale = stacks_to_scale() print("Stacks collected for scaling: ") for stacks, schedules in stacks__to_scale.items(): stack = stacks.split("/")[1] namespace = stacks.split("/")[0] for n in range(len(schedules)): schedules_n = schedules[n] replicas = schedules_n.get('replicas', None) minReplicas = schedules_n.get('minReplicas', None) maxReplicas = schedules_n.get('maxReplicas', None) schedule = schedules_n.get('schedule', None) print("Stack: %s, Namespace: %s, Replicas: %s, MinReplicas: %s, MaxReplicas: %s, minSchedule: %s" % (stack, namespace, replicas, minReplicas, maxReplicas, schedule)) with open("/root/schedule_scaling/templates/stack-script.py", 'r') as script: script = script.read() stack_script = script % { 'namespace': namespace, 'name': stack, 'replicas': replicas, 'minReplicas': minReplicas, 'maxReplicas': maxReplicas, 'time': EXECUTION_TIME, } i = 0 while os.path.exists("/tmp/scaling_jobs/%s-%d.py" % (stack, i)): i += 1 script_creator = open("/tmp/scaling_jobs/%s-%d.py" % (stack, i), "w") script_creator.write(stack_script) script_creator.close() cmd = ['sleep 50 ; . /root/.profile ; /usr/bin/python', script_creator.name, '2>&1 | tee -a /tmp/scale_activities.log'] cmd = ' '.join(map(str, cmd)) scaling_cron = CronTab(user='root') job = scaling_cron.new(command=cmd) try: job.setall(schedule) job.set_comment("Scheduling_Jobs") scaling_cron.write() except Exception: print('Stack: %s has syntax error in the schedule' % (stack)) pass
Example #23
Source File: schedule_scaling.py From kube-schedule-scaler with GNU General Public License v3.0 | 4 votes |
def deploy_job_creator(): """ Create CronJobs for configured Deployments """ deployments__to_scale = deployments_to_scale() print("Deployments collected for scaling: ") for deploy, schedules in deployments__to_scale.items(): deployment = deploy.split("/")[1] namespace = deploy.split("/")[0] for n in range(len(schedules)): schedules_n = schedules[n] replicas = schedules_n.get('replicas', None) minReplicas = schedules_n.get('minReplicas', None) maxReplicas = schedules_n.get('maxReplicas', None) schedule = schedules_n.get('schedule', None) print("Deployment: %s, Namespace: %s, Replicas: %s, MinReplicas: %s, MaxReplicas: %s, Schedule: %s" % (deployment, namespace, replicas, minReplicas, maxReplicas, schedule)) with open("/root/schedule_scaling/templates/deployment-script.py", 'r') as script: script = script.read() deployment_script = script % { 'namespace': namespace, 'name': deployment, 'replicas': replicas, 'minReplicas': minReplicas, 'maxReplicas': maxReplicas, 'time': EXECUTION_TIME, } i = 0 while os.path.exists("/tmp/scaling_jobs/%s-%s.py" % (deployment, i)): i += 1 script_creator = open("/tmp/scaling_jobs/%s-%s.py" % (deployment, i), "w") script_creator.write(deployment_script) script_creator.close() cmd = ['sleep 50 ; . /root/.profile ; /usr/bin/python', script_creator.name, '2>&1 | tee -a /tmp/scale_activities.log'] cmd = ' '.join(map(str, cmd)) scaling_cron = CronTab(user='root') job = scaling_cron.new(command=cmd) try: job.setall(schedule) job.set_comment("Scheduling_Jobs") scaling_cron.write() except Exception: print('Deployment: %s has syntax error in the schedule' % (deployment)) pass