Python urllib.request.read() Examples

The following are 23 code examples of urllib.request.read(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module urllib.request , or try the search function .
Example #1
Source File: download.py    From deep-koalarization with MIT License 6 votes vote down vote up
def _download_img(self, image_url: str):
        """Download single image.

        Args:
            image_url (str): Image url.

        Returns:
            Union[str, None]: Image path if image was succesfully downloaded. Otherwise, None.
        """
        image_name = self._encode_image_name(image_url)
        image_path = join(self.dest_dir, image_name)
        if not isfile(image_path):
            try:
                # TODO use request.get with accept jpg?
                request = urllib.request.urlopen(image_url, timeout=5)
                image = request.read()
                if imghdr.what("", image) == "jpeg":
                    with open(image_path, "wb") as f:
                        f.write(image)
            except Exception as e:
                print("Error downloading {}: {}".format(image_url, e), file=sys.stderr)
                return None
        return image_path 
Example #2
Source File: views.py    From StockSensation with Apache License 2.0 6 votes vote down vote up
def get_segList(stocknumber):
    segList = []
    for pageNum in range(1, 21):
        urlPage = 'http://guba.eastmoney.com/list,' + \
            str(stocknumber) + '_' + str(pageNum) + '.html'
        stockPageRequest = urllib.request.urlopen(urlPage)
        htmlTitleContent = str(stockPageRequest.read(), 'utf-8')
        titlePattern = re.compile(
            '<span class="l3">(.*?)title="(.*?)"(.*?)<span class="l6">(\d\d)-(\d\d)</span>', re.S)
        gotTitle = re.findall(titlePattern, htmlTitleContent)
        for i in range(len(gotTitle)):
            for j in range(len(dateCount)):
                if int(gotTitle[i][3]) == dateCount[j][0] and int(gotTitle[i][4]) == dateCount[j][1]:
                    segSentence = list(jieba.cut(gotTitle[i][1], cut_all=True))
                    segList.append(segSentence)
    return segList

# 分类器构建和数据持久化 
Example #3
Source File: binary_utils.py    From RLs with Apache License 2.0 6 votes vote down vote up
def load_remote_manifest(url: str) -> Dict[str, Any]:
    """
    Converts a remote yaml file into a Python dictionary
    """
    tmp_dir, _ = get_tmp_dir()
    try:
        request = urllib.request.urlopen(url, timeout=30)
    except urllib.error.HTTPError as e:  # type: ignore
        e.msg += " " + url
        raise
    manifest_path = os.path.join(tmp_dir, str(uuid.uuid4()) + ".yaml")
    with open(manifest_path, "wb") as manifest:
        while True:
            buffer = request.read(BLOCK_SIZE)
            if not buffer:
                # There is nothing more to read
                break
            manifest.write(buffer)
    try:
        result = load_local_manifest(manifest_path)
    finally:
        os.remove(manifest_path)
    return result 
Example #4
Source File: torrentcast.py    From platypush with MIT License 6 votes vote down vote up
def search(self, query):
        request = urllib.request.urlopen(urllib.request.Request(
            'https://api.apidomain.info/list?' + urllib.parse.urlencode({
                'sort': 'relevance',
                'quality': '720p,1080p,3d',
                'page': 1,
                'keywords': query,
            }),
            headers = {
                'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' +
                    '(KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
            })
        )

        results = json.loads(request.read())
        return results 
Example #5
Source File: torrentcast.py    From platypush with MIT License 5 votes vote down vote up
def pause(self):
        http = urllib3.PoolManager()
        request = http.request('POST',
            'http://{}:{}/pause/'.format(self.server, self.port))

        self.state = PlayerState.PAUSE.value
        return request.read() 
Example #6
Source File: utils.py    From BlendLuxCore with GNU General Public License v3.0 5 votes vote down vote up
def calc_hash(filename):
    BLOCK_SIZE = 65536
    file_hash = hashlib.sha256()
    with open(filename, 'rb') as file:
        block = file.read(BLOCK_SIZE)
        while len(block) > 0:
            file_hash.update(block)
            block = file.read(BLOCK_SIZE)
    return file_hash.hexdigest() 
Example #7
Source File: utils.py    From BlendLuxCore with GNU General Public License v3.0 5 votes vote down vote up
def download_table_of_contents(context):
    scene = context.scene

    try:
        import urllib.request
        with urllib.request.urlopen(LOL_HOST_URL + "/assets_model.json", timeout=60) as request:
            import json
            scene.luxcoreOL.model['assets'] = json.loads(request.read())
            for asset in scene.luxcoreOL.model['assets']:
                asset['downloaded'] = 0.0

            # with urllib.request.urlopen(LOL_HOST_URL + "/assets_scene.json", timeout=60) as request:
            #     import json
            #     scene.luxcoreOL.scene['assets'] = json.loads(request.read())
            #     for asset in scene.luxcoreOL.scene['assets']:
            #         asset['downloaded'] = 0.0

            with urllib.request.urlopen(LOL_HOST_URL + "/assets_material.json", timeout=60) as request:
                import json
                scene.luxcoreOL.material['assets'] = json.loads(request.read())
                for asset in scene.luxcoreOL.material['assets']:
                    asset['downloaded'] = 0.0

        context.scene.luxcoreOL.ui.ToC_loaded = True
        init_categories(context)
        bg_task = Thread(target=check_cache, args=(context, ))
        bg_task.start()
        return True
    except ConnectionError as error:
        print("Connection error: Could not download table of contents")
        print(error)
        return False
    except urllib.error.URLError as error:
        print("URL error: Could not download table of contents")
        print(error)
        return False 
Example #8
Source File: ffmpeg_bootstrap.py    From maniwani with MIT License 5 votes vote down vote up
def write_ffmpeg(executable):
    os.makedirs(FFMPEG_EXTRACTION_DIR, exist_ok=True)
    output_path = os.path.join(FFMPEG_EXTRACTION_DIR, FFMPEG_EXECUTABLE)
    output_file = open(output_path, "wb")
    output_file.write(executable.read())
    os.chmod(output_path, stat.S_IXUSR) 
Example #9
Source File: ffmpeg_bootstrap.py    From maniwani with MIT License 5 votes vote down vote up
def download_ffmpeg_archive():
    request = urllib.request.urlopen(FFMPEG_URL)
    return io.BytesIO(request.read()) 
Example #10
Source File: views.py    From StockSensation with Apache License 2.0 5 votes vote down vote up
def nbopinionResult(request):
    Nb_stock_number = request.GET['Nb_stock_number']
    dateCount = setDate()
    stock_name = get_stock_name(Nb_stock_number)
    homedir = os.getcwd()

    clf = joblib.load(homedir+'/StockVisualData/Clf.pkl')
    vectorizer = joblib.load(homedir+'/StockVisualData/Vect')
    transformer = joblib.load(homedir+'/StockVisualData/Tfidf')

    for pageNum in range(1, 21):
        urlPage = 'http://guba.eastmoney.com/list,' + \
            str(Nb_stock_number)+'_'+str(pageNum)+'.html'
        stockPageRequest = urllib.request.urlopen(urlPage)
        htmlTitleContent = str(stockPageRequest.read(), 'utf-8')
        titlePattern = re.compile(
            '<span class="l3">(.*?)title="(.*?)"(.*?)<span class="l6">(\d\d)-(\d\d)</span>', re.S)
        gotTitle = re.findall(titlePattern, htmlTitleContent)
        for i in range(len(gotTitle)):
            text_predict = []
            for j in range(len(dateCount)):
                if int(gotTitle[i][3]) == dateCount[j][0] and int(gotTitle[i][4]) == dateCount[j][1]:
                    dateCount[j][5] += 1
                    seg_list = list(jieba.cut(gotTitle[i][1], cut_all=True))
                    seg_text = " ".join(seg_list)
                    text_predict.append(seg_text)
                    text_predict = np.array(text_predict)
                    text_frequency = vectorizer.transform(text_predict)
                    new_tfidf = transformer.transform(text_frequency)
                    predicted = clf.predict(new_tfidf)
                    if predicted == '积极':
                        dateCount[j][2] += 1
                        continue
                    elif predicted == '消极':
                        dateCount[j][3] += 1
                        continue
                    elif predicted == '中立':
                        dateCount[j][4] += 1
    return render(request, 'nbopinionResult.html', {'stock_name': stock_name, 'dateCount': json.dumps(dateCount)})

# 设置时间数组 
Example #11
Source File: views.py    From StockSensation with Apache License 2.0 5 votes vote down vote up
def dicopinionResult(request):
    dicStockNum = request.GET['dicStockNum']
    dateCount = setDate()
    stock_name = get_stock_name(dicStockNum)

    for pageNum in range(1, 10):
        urlPage = 'http://guba.eastmoney.com/list,' + \
            str(dicStockNum)+',f_'+str(pageNum)+'.html'
        stockPageRequest = urllib.request.urlopen(urlPage)
        htmlTitleContent = str(stockPageRequest.read(), 'utf-8')
        titlePattern = re.compile(
            '<span class="l3">(.*?)title="(.*?)"(.*?)<span class="l6">(\d\d)-(\d\d)</span>', re.S)
        gotTitle = re.findall(titlePattern, htmlTitleContent)
        print(type(gotTitle))
        for i in range(len(gotTitle)):
            for j in range(len(dateCount)):
                if int(gotTitle[i][3]) == dateCount[j][0] and int(gotTitle[i][4]) == dateCount[j][1]:
                    dateCount[j][5] += 1
                    segList = list(jieba.cut(gotTitle[i][1], cut_all=True))
                    # print(tx_npl(gotTitle[i][1]))
                    for eachItem in segList:
                        if eachItem != ' ':
                            if eachItem in positiveWord:
                                dateCount[j][2] += 1
                                continue
                            elif eachItem in negativeWord:
                                dateCount[j][3] += 1
                                continue
                            elif eachItem in neutralWord:
                                dateCount[j][4] += 1
    return render(request, 'dicopinionResult.html', {'stock_name': stock_name, 'dateCount': json.dumps(dateCount)}) 
Example #12
Source File: torrentcast.py    From platypush with MIT License 5 votes vote down vote up
def stop(self):
        http = urllib3.PoolManager()
        request = http.request('POST',
            'http://{}:{}/stop/'.format(self.server, self.port))

        self.state = PlayerState.STOP.value
        return request.read() 
Example #13
Source File: integration_tests.py    From clusterfuzz with Apache License 2.0 5 votes vote down vote up
def execute(_):
  """Run integration tests."""
  command = 'run_server'
  indicator = b'Booting worker'

  try:
    lines = []
    server = common.execute_async(
        'python -u butler.py {} --skip-install-deps'.format(command))
    test_utils.wait_for_emulator_ready(
        server,
        command,
        indicator,
        timeout=RUN_SERVER_TIMEOUT,
        output_lines=lines)

    # Sleep a small amount of time to ensure the server is definitely ready.
    time.sleep(1)

    # Call setup ourselves instead of passing --bootstrap since we have no idea
    # when that finishes.
    # TODO(ochang): Make bootstrap a separate butler command and just call that.
    common.execute(
        ('python butler.py run setup '
         '--non-dry-run --local --config-dir={config_dir}'
        ).format(config_dir=constants.TEST_CONFIG_DIR),
        exit_on_error=False)

    request = urllib.request.urlopen('http://' + constants.DEV_APPSERVER_HOST)
    request.read()  # Raises exception on error
  except Exception:
    print('Error occurred:')
    print(b''.join(lines))
    raise
  finally:
    server.terminate()

  # TODO(ochang): Test that bot runs, and do a basic fuzzing session to ensure
  # things work end to end.
  print('All end-to-end integration tests passed.') 
Example #14
Source File: torrentcast.py    From platypush with MIT License 5 votes vote down vote up
def play(self, url):
        request = urllib.request.urlopen(
            'http://{}:{}/play/'.format(self.server, self.port),
            data=urllib.parse.urlencode({
                'url': url
            }).encode()
        )

        self.state = PlayerState.PLAY.value
        return request.read() 
Example #15
Source File: B13346_10_01-gpx-reporter.py    From Learning-Geospatial-Analysis-with-Python-Third-Edition with MIT License 5 votes vote down vote up
def wms(minx, miny, maxx, maxy, service, lyr, epsg, style, img, w, h):
    """Retrieve a wms map image from
    the specified service and saves it as a JPEG."""
    wms = service
    wms += "?SERVICE=WMS&VERSION=1.1.1&REQUEST=GetMap&"
    wms += "LAYERS={}".format(lyr)
    wms += "&STYLES={}&".format(style)
    wms += "SRS=EPSG:{}&".format(epsg)
    wms += "BBOX={},{},{},{}&".format(minx, miny, maxx, maxy)
    wms += "WIDTH={}&".format(w)
    wms += "HEIGHT={}&".format(h)
    wms += "FORMAT=image/jpeg"
    wmsmap = urllib.request.urlopen(wms)
    with open(img + ".jpg", "wb") as f:
        f.write(wmsmap.read()) 
Example #16
Source File: wattpad2epub.py    From Wattpad2Epub with GNU General Public License v2.0 5 votes vote down vote up
def get_cover(cover_url):
    print(cover_url)
    tries = 5
    while tries > 0:
        try:
            req = urllib.request.Request(cover_url)
            req.add_header('User-agent', 'Mozilla/5.0 (Linux x86_64)')
            request = urllib.request.urlopen(req)
            temp = request.read()
            with open('cover.jpg', 'wb') as f:
                f.write(temp)
            tries == 0
            # break
            return 1
        except Exception as error:
            tries -= 1
            print("Can't retrieve the cover")
            print(error)
            return 0


###############################################################################
# TODO: Remove this block when appropriate
# Workaround for bug in ebooklib 0.15.
# Something goes wrong when adding an image as a cover, and we need to work
# around it by replacing the get_template function with our own that takes care
# of properly encoding the template as utf8. 
Example #17
Source File: wattpad2epub.py    From Wattpad2Epub with GNU General Public License v2.0 5 votes vote down vote up
def get_html(url):
    tries = 5
    req = urllib.request.Request(url)
    req.add_header('User-agent', 'Mozilla/5.0 (Linux x86_64)')
    # Add DoNotTrack header, do the right thing even if nobody cares
    req.add_header('DNT', '1')
    while tries > 0:
        try:
            request = urllib.request.urlopen(req)
            tries = 0
        except socket.timeout:
            if debug:
                raise
            tries -= 1
        except socket.timeout:
            if debug:
                raise
            tries -= 1
        except urllib.error.URLError as e:
            if debug:
                raise
            print("URL Error " + str(e.code) + ": " + e.reason)
            print("Aborting...")
            exit()
        except urllib.error.HTTPError as e:
            if debug:
                raise
            print("HTTP Error " + str(e.code) + ": " + e.reason)
            print("Aborting...")
            exit()
    # html.parser generates problems, I could fix them, but switching to lxml
    # is easier and faster
    soup = BeautifulSoup(request.read(), "lxml")
    return soup 
Example #18
Source File: schedule_scaling.py    From kube-schedule-scaler with GNU General Public License v3.0 5 votes vote down vote up
def fetch_schedule_actions_from_url(url):
    request = urllib.request.urlopen(url)
    try:
        content = request.read().decode('utf-8')
    except:
        content = None
    finally:
        request.close()

    return content 
Example #19
Source File: schedule_scaling.py    From kube-schedule-scaler with GNU General Public License v3.0 5 votes vote down vote up
def fetch_schedule_actions_s3(url):
    source = parse_s3_url(url)

    print(source)

    s3 = boto3.client('s3')
    try:
        element = s3.get_object(**source)
    except:
        print('Couldn\'t read %s' % (url))
        return '[]'

    return element['Body'].read().decode('utf-8') 
Example #20
Source File: binary_utils.py    From RLs with Apache License 2.0 4 votes vote down vote up
def download_and_extract_zip(url: str, name: str) -> None:
    """
    Downloads a zip file under a URL, extracts its contents into a folder with the name
    argument and gives chmod 755 to all the files it contains. Files are downloaded and
    extracted into special folders in the temp folder of the machine.
    :param url: The URL of the zip file
    :param name: The name that will be given to the folder containing the extracted data
    """
    zip_dir, bin_dir = get_tmp_dir()
    url_hash = "-" + hashlib.md5(url.encode()).hexdigest()
    binary_path = os.path.join(bin_dir, name + url_hash)
    if os.path.exists(binary_path):
        shutil.rmtree(binary_path)

    # Download zip
    try:
        request = urllib.request.urlopen(url, timeout=30)
    except urllib.error.HTTPError as e:  # type: ignore
        e.msg += " " + url
        raise
    zip_size = int(request.headers["content-length"])
    zip_file_path = os.path.join(zip_dir, str(uuid.uuid4()) + ".zip")
    with open(zip_file_path, "wb") as zip_file:
        downloaded = 0
        while True:
            buffer = request.read(BLOCK_SIZE)
            if not buffer:
                # There is nothing more to read
                break
            downloaded += len(buffer)
            zip_file.write(buffer)
            downloaded_percent = downloaded / zip_size * 100
            print_progress(f"  Downloading {name}", downloaded_percent)
        print("")

    # Extraction
    with ZipFileWithProgress(zip_file_path, "r") as zip_ref:
        zip_ref.extract_zip(f"  Extracting  {name}", binary_path)  # type: ignore
    print("")

    # Clean up zip
    print_progress(f"  Cleaning up {name}", 0)
    os.remove(zip_file_path)

    # Give permission
    for f in glob.glob(binary_path + "/**/*", recursive=True):
        # 16877 is octal 40755, which denotes a directory with permissions 755
        os.chmod(f, 16877)
    print_progress(f"  Cleaning up {name}", 100)
    print("") 
Example #21
Source File: views.py    From StockSensation with Apache License 2.0 4 votes vote down vote up
def NB_create_model():
    # 获取标题文本
    text_list = []
    for page_num in range(0, 5):
        # 页数可改
        url = 'http://guba.eastmoney.com/list,gssz,f_' + \
            str(page_num) + '.html'
        request = urllib.request.urlopen(url)
        content = str(request.read(), 'utf-8')
        pattern = re.compile(
            '<span class="l3">(.*?)title="(.*?)"(.*?)<span class="l6">(\d\d)-(\d\d)</span>', re.S)
        itemstemp = re.findall(pattern, content)
        for i in range(0, len(itemstemp)):
            seg_list = list(jieba.cut(itemstemp[i][1], cut_all=False))
            seg_str = " ".join(seg_list)
            text_list.append(seg_str)
    text_list = np.array(text_list)

    # 标注文本特征
    class_vec = [' ']*len(text_list)
    for i in range(0, len(text_list)):
        for pos in positiveWord:
            if pos in text_list[i]:
                class_vec[i] = '积极'
        for neg in negativeWord:
            if neg in text_list[i]:
                class_vec[i] = '消极'
        for neu in neutralWord:
            if neu in text_list[i]:
                class_vec[i] = '中立'
        if class_vec[i] == ' ':
            class_vec[i] = '无立场'

    # 将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频
    vectorizer = CountVectorizer()
    # 该类会统计每个词语的tf-idf权值
    transformer = TfidfTransformer()
    # 第一个fit_transform是计算tf-idf,第二个fit_transform是将文本转为词频矩阵
    tfidf = transformer.fit_transform(vectorizer.fit_transform(text_list))

    # 构造分类器
    clf = MultinomialNB()
    clf.fit(tfidf, class_vec)

    # 持久化保存
    joblib.dump(clf, 'Clf.pkl')
    joblib.dump(vectorizer, 'Vect')
    joblib.dump(transformer, 'Tf-Idf') 
Example #22
Source File: schedule_scaling.py    From kube-schedule-scaler with GNU General Public License v3.0 4 votes vote down vote up
def stack_job_creator():
    """ Create CronJobs for configured Stacks """

    stacks__to_scale = stacks_to_scale()
    print("Stacks collected for scaling: ")
    for stacks, schedules in stacks__to_scale.items():
        stack = stacks.split("/")[1]
        namespace = stacks.split("/")[0]
        for n in range(len(schedules)):
            schedules_n = schedules[n]
            replicas = schedules_n.get('replicas', None)
            minReplicas = schedules_n.get('minReplicas', None)
            maxReplicas = schedules_n.get('maxReplicas', None)
            schedule = schedules_n.get('schedule', None)

            print("Stack: %s, Namespace: %s, Replicas: %s, MinReplicas: %s, MaxReplicas: %s, minSchedule: %s" %
                  (stack, namespace, replicas, minReplicas, maxReplicas, schedule))

            with open("/root/schedule_scaling/templates/stack-script.py", 'r') as script:
                script = script.read()
            stack_script = script % {
                'namespace': namespace,
                'name': stack,
                'replicas': replicas,
                'minReplicas': minReplicas,
                'maxReplicas': maxReplicas,
                'time': EXECUTION_TIME,
            }
            i = 0
            while os.path.exists("/tmp/scaling_jobs/%s-%d.py" % (stack, i)):
                i += 1
            script_creator = open("/tmp/scaling_jobs/%s-%d.py" % (stack, i), "w")
            script_creator.write(stack_script)
            script_creator.close()
            cmd = ['sleep 50 ; . /root/.profile ; /usr/bin/python', script_creator.name,
                   '2>&1 | tee -a /tmp/scale_activities.log']
            cmd = ' '.join(map(str, cmd))
            scaling_cron = CronTab(user='root')
            job = scaling_cron.new(command=cmd)
            try:
                job.setall(schedule)
                job.set_comment("Scheduling_Jobs")
                scaling_cron.write()
            except Exception:
                print('Stack: %s has syntax error in the schedule' % (stack))
                pass 
Example #23
Source File: schedule_scaling.py    From kube-schedule-scaler with GNU General Public License v3.0 4 votes vote down vote up
def deploy_job_creator():
    """ Create CronJobs for configured Deployments """

    deployments__to_scale = deployments_to_scale()
    print("Deployments collected for scaling: ")
    for deploy, schedules in deployments__to_scale.items():
        deployment = deploy.split("/")[1]
        namespace = deploy.split("/")[0]
        for n in range(len(schedules)):
            schedules_n = schedules[n]
            replicas = schedules_n.get('replicas', None)
            minReplicas = schedules_n.get('minReplicas', None)
            maxReplicas = schedules_n.get('maxReplicas', None)
            schedule = schedules_n.get('schedule', None)
            print("Deployment: %s, Namespace: %s, Replicas: %s, MinReplicas: %s, MaxReplicas: %s, Schedule: %s"
                  % (deployment, namespace, replicas, minReplicas, maxReplicas, schedule))

            with open("/root/schedule_scaling/templates/deployment-script.py", 'r') as script:
                script = script.read()
            deployment_script = script % {
                'namespace': namespace,
                'name': deployment,
                'replicas': replicas,
                'minReplicas': minReplicas,
                'maxReplicas': maxReplicas,
                'time': EXECUTION_TIME,
            }
            i = 0
            while os.path.exists("/tmp/scaling_jobs/%s-%s.py" % (deployment, i)):
                i += 1
            script_creator = open("/tmp/scaling_jobs/%s-%s.py" % (deployment, i), "w")
            script_creator.write(deployment_script)
            script_creator.close()
            cmd = ['sleep 50 ; . /root/.profile ; /usr/bin/python', script_creator.name,
                   '2>&1 | tee -a /tmp/scale_activities.log']
            cmd = ' '.join(map(str, cmd))
            scaling_cron = CronTab(user='root')
            job = scaling_cron.new(command=cmd)
            try:
                job.setall(schedule)
                job.set_comment("Scheduling_Jobs")
                scaling_cron.write()
            except Exception:
                print('Deployment: %s has syntax error in the schedule' % (deployment))
                pass