Python urllib.urlretrieve() Examples

The following are code examples for showing how to use urllib.urlretrieve(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: get_data.py    Apache License 2.0 7 votes vote down vote up
def get_cifar10(data_dir):
    if not os.path.isdir(data_dir):
        os.system("mkdir " + data_dir)
    cwd = os.path.abspath(os.getcwd())
    os.chdir(data_dir)
    if (not os.path.exists('train.rec')) or \
       (not os.path.exists('test.rec')) :
        import urllib, zipfile, glob
        dirname = os.getcwd()
        zippath = os.path.join(dirname, "cifar10.zip")
        urllib.urlretrieve("http://data.mxnet.io/mxnet/data/cifar10.zip", zippath)
        zf = zipfile.ZipFile(zippath, "r")
        zf.extractall()
        zf.close()
        os.remove(zippath)
        for f in glob.glob(os.path.join(dirname, "cifar", "*")):
            name = f.split(os.path.sep)[-1]
            os.rename(f, os.path.join(dirname, name))
        os.rmdir(os.path.join(dirname, "cifar"))
    os.chdir(cwd)

# data 
Example 2
Project: hugo-lambda-function   Author: jolexa   File: main.py    MIT License 7 votes vote down vote up
def lambda_handler(event, context):
    #logger.info("Event: " + str(event))
    message = json.loads(event['Records'][0]['Sns']['Message'])
    #logger.info("Message: " + str(message))

    repourl = message['repository']['url']
    reponame = message['repository']['name']

    logger.info("This is the URL: " + str(repourl))

    urllib.urlretrieve (repourl + "/archive/master.zip", "/tmp/master.zip")
    zfile = zipfile.ZipFile('/tmp/master.zip')
    zfile.extractall("/tmp/unzipped")
    builddir = "/tmp/unzipped/" + reponame + "-master/"

    subprocess.call("/var/task/hugo.go" , shell=True, cwd=builddir)
    pushdir = builddir + "public/"
    bucketuri = "s3://" + reponame + "/"
    subprocess.call("python /var/task/awscli.py s3 sync --size-only --delete --sse AES256 " + pushdir + " " + bucketuri, shell=True) 
Example 3
Project: Deformable-ConvNets   Author: guanfuchen   File: coco.py    MIT License 7 votes vote down vote up
def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic) 
Example 4
Project: pyblish-win   Author: pyblish   File: test_urllib.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_copy(self):
        # Test that setting the filename argument works.
        second_temp = "%s.2" % test_support.TESTFN
        self.registerFileForCleanUp(second_temp)
        result = urllib.urlretrieve(self.constructLocalFileUrl(
            test_support.TESTFN), second_temp)
        self.assertEqual(second_temp, result[0])
        self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
                                                  "made")
        FILE = file(second_temp, 'rb')
        try:
            text = FILE.read()
            FILE.close()
        finally:
            try: FILE.close()
            except: pass
        self.assertEqual(self.text, text) 
Example 5
Project: pyblish-win   Author: pyblish   File: test_urllib.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_short_content_raises_ContentTooShortError(self):
        self.fakehttp('''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1

FF
''')

        def _reporthook(par1, par2, par3):
            pass

        try:
            self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve,
                    'http://example.com', reporthook=_reporthook)
        finally:
            self.unfakehttp() 
Example 6
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: get_data.py    Apache License 2.0 6 votes vote down vote up
def get_mnist(data_dir):
    if not os.path.isdir(data_dir):
        os.system("mkdir " + data_dir)
    os.chdir(data_dir)
    if (not os.path.exists('train-images-idx3-ubyte')) or \
       (not os.path.exists('train-labels-idx1-ubyte')) or \
       (not os.path.exists('t10k-images-idx3-ubyte')) or \
       (not os.path.exists('t10k-labels-idx1-ubyte')):
        import urllib, zipfile
        zippath = os.path.join(os.getcwd(), "mnist.zip")
        urllib.urlretrieve("http://data.mxnet.io/mxnet/data/mnist.zip", zippath)
        zf = zipfile.ZipFile(zippath, "r")
        zf.extractall()
        zf.close()
        os.remove(zippath)
    os.chdir("..") 
Example 7
Project: garden.facelock   Author: kivy-garden   File: download_images.py    MIT License 6 votes vote down vote up
def store_raw_images():
    '''To download images from image-net
        (Change the url for different needs of cascades)
    '''
    neg_images_link = 'http://image-net.org/api/text/imagenet.synset.geturls?wnid=n07942152'
    neg_image_urls = urllib2.urlopen(neg_images_link).read().decode()

    pic_num = 1

    for i in neg_image_urls.split('\n'):
        try:

            print i
            urllib.urlretrieve(i, "neg/" + str(pic_num) + '.jpg')
            img = cv2.imread("neg/" + str(pic_num) +'.jpg',
                                cv2.IMREAD_GRAYSCALE)
            resized_image = cv2.resize(img, (100, 100))
            cv2.imwrite("neg/" + str(pic_num) + '.jpg', resized_image)
            pic_num = pic_num + 1

        except:
            print "error" 
Example 8
Project: pytorch-lstd   Author: JiasiWang   File: coco.py    MIT License 6 votes vote down vote up
def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print('Please specify target directory')
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print('downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)) 
Example 9
Project: VQA   Author: anujanegi   File: coco.py    MIT License 6 votes vote down vote up
def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print('Please specify target directory')
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print('downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)) 
Example 10
Project: SGAN   Author: YuhangSong   File: mnist.py    MIT License 6 votes vote down vote up
def load(batch_size, test_batch_size, n_labelled=None):
    filepath = '/tmp/mnist.pkl.gz'
    url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'

    if not os.path.isfile(filepath):
        print "Couldn't find MNIST dataset in /tmp, downloading..."
        urllib.urlretrieve(url, filepath)

    with gzip.open('/tmp/mnist.pkl.gz', 'rb') as f:
        train_data, dev_data, test_data = pickle.load(f)

    return (
        mnist_generator(train_data, batch_size, n_labelled), 
        mnist_generator(dev_data, test_batch_size, n_labelled), 
        mnist_generator(test_data, test_batch_size, n_labelled)
    ) 
Example 11
Project: LearningToSpotArtifacts   Author: sjenni   File: download_and_convert_stl10.py    MIT License 6 votes vote down vote up
def download_and_extract():
    """
        Downloads the stl-10 dataset
    """
    dest_directory = STL10_DATADIR
    if not os.path.exists(dest_directory):
        os.makedirs(dest_directory)
    filename = DATA_URL.split('/')[-1]
    filepath = os.path.join(dest_directory, filename)
    if not os.path.exists(filepath):
        def _progress(count, block_size, total_size):
            sys.stdout.write(
                '\rDownloading %s %.2f%%' % (filename, float(count * block_size) / float(total_size) * 100.0))
            sys.stdout.flush()

        filepath, _ = urllib.urlretrieve(DATA_URL, filepath, reporthook=_progress)
        print('Downloaded', filename)
        tarfile.open(filepath, 'r:gz').extractall(dest_directory) 
Example 12
Project: faster-rcnn.pytorch_resnet50   Author: kentaroy47   File: coco.py    MIT License 6 votes vote down vote up
def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print('Please specify target directory')
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print('downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)) 
Example 13
Project: RMDL   Author: eric-erki   File: Download_WOS.py    GNU General Public License v3.0 6 votes vote down vote up
def download_and_extract():
    """
    Download and extract the WOS datasets
    :return: None
    """
    dest_directory = DATA_DIR
    if not os.path.exists(dest_directory):
        os.makedirs(dest_directory)
    filename = DATA_URL.split('/')[-1]
    filepath = os.path.join(dest_directory, filename)


    path = os.path.abspath(dest_directory)
    if not os.path.exists(filepath):
        def _progress(count, block_size, total_size):
            sys.stdout.write('\rDownloading %s %.2f%%' % (filename,
                                                          float(count * block_size) / float(total_size) * 100.0))
            sys.stdout.flush()

        filepath, _ = urllib.urlretrieve(DATA_URL, filepath, reporthook=_progress)

        print('Downloaded', filename)

        tarfile.open(filepath, 'r').extractall(dest_directory)
    return path 
Example 14
Project: Deep-Feature-Flow-Segmentation   Author: tonysy   File: coco.py    MIT License 6 votes vote down vote up
def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic) 
Example 15
Project: LifelongVAE   Author: jramapuram   File: fashion_number.py    MIT License 6 votes vote down vote up
def download(self, path):
        '''Note: path is the base dir '''
        if not self._exists(path):
            if not os.path.isdir(path):
                os.makedirs(path)

            # gather file paths
            zip_files = Fashion.get_paths(path)
            # zip_files = [TRAIN_IMGS_FILE, TRAIN_LABELS_FILE,
            #              TEST_IMGS_FILE, TEST_LABELS_FILE]
            # zip_files = [os.path.join(path, z) for z in zip_files]

            # gather urls
            urls = [TRAIN_IMGS_URL, TRAIN_LABLES_URL,
                    TEST_IMAGES_URL, TEST_LABELS_URL]

            # download the file(s)
            import urllib
            for filename, url in zip(zip_files,  urls):
                print("downloading ", filename)
                urllib.urlretrieve(url=url, filename=filename)

            print("FashionMNIST downloaded successfully...")
        else:
            print("FashionMNIST files already downloaded...") 
Example 16
Project: weibo_album_spider   Author: airbasic   File: weibospider.py    MIT License 6 votes vote down vote up
def run(self):
		logger.debug('开工...')
		uidList = [{'uid':'3344758714','nick':'root'}]
		fanList = []
		imgList = []

		driver = self._loginWeibo()
		#fanList = self._getFans(driver)
		for item in uidList:
			fanList += self._getFans(driver,item['uid'])
		for item in fanList:
			imgListTmp = []
			imgListTmp = self._getPhotos(driver,item['uid'])
			imgList.append({'uid':item['uid'],'imgList':imgListTmp})
		for item in imgList:
			if os.path.exists('rices/'+item['uid']):
				continue
			else:
				os.mkdir('rices/'+item['uid'])
				for itemB in item['imgList']:
					listTmp = itemB.split('/')
					filename = listTmp[len(listTmp)-1]
					urllib.urlretrieve(itemB, 'rices/%s' % filename)

		driver.quit() 
Example 17
Project: weibo_album_spider   Author: airbasic   File: weibospider.py    MIT License 6 votes vote down vote up
def _unifyImgUrl(self,imgurl,uid='null'):
		listTmp = imgurl.split('/')
		human_headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
						 'User-Agent': 'Mozilla/5.0 (Linux; U; Android 5.1.1; en-us; KIW-AL10 Build/HONORKIW-AL10) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/1.0.0.100 U3/0.8.0 Mobile Safari/534.30 AlipayDefined(nt:WIFI,ws:360|592|3.0) AliApp(AP/9.5.3.030408) AlipayClient/9.5.3.030408 Language/zh-Hans'
						}
		if len(listTmp) == 5:
			listTmp[3] = 'large'
			imgurl = '/'.join(listTmp)
			if os.path.exists('rices/'+uid)==False:
				os.mkdir('rices/'+uid)
			#urllib.urlretrieve(imgurl, 'rices/%s/%s' % (uid,listTmp[4]))
			resp = requests.get(imgurl,headers=human_headers,stream=True)
			imgFileName = 'rices/%s/%s' % (uid,listTmp[4])
			with open(imgFileName,'wb') as fd:
				for chunk in resp.iter_content():
					fd.write(chunk)
				fd.close()

		else:
			logger.debug('无法解析小图url,保留缩略图:%s' % imgurl)
		return '/'.join(listTmp) 
Example 18
Project: Python   Author: Guzi219   File: englishDownloadWithBrowserUI.py    MIT License 6 votes vote down vote up
def saveFile(self, url, page, idx):
        user_define_name = self.now_date() + '_p_' + str(page) + '_' + string.zfill(idx, 2)  # 补齐2位
        file_ext = self.file_extension(url)  # 后缀名
        save_file_name = user_define_name + "_" + file_ext

        # 不能保存,改用open方法
        # urllib.urlretrieve(item[0], self.save_path + save_file_name)
        # 保存图片
        url = self.CheckUrlValidate(url)
        try:
            pic = requests.get(url, timeout=10)
            f = open(self.store_dir + os.sep + save_file_name, 'wb')
            f.write(pic.content)
            f.close()
            print '\ndone save file ' + save_file_name
        except ReadTimeout:
            print 'save file %s failed. cause by timeout(10)' % (save_file_name)
        except MissingSchema:
            print 'invalid url %s' % (url)
        except Exception, e:
            print e

    # 保存文档 
Example 19
Project: Python   Author: Guzi219   File: englishDownload.py    MIT License 6 votes vote down vote up
def saveFile(self, url, page, idx):
        user_define_name = self.now_date() + '_p_' + str(page) + '_' + string.zfill(idx, 2)  # 补齐2位
        file_ext = self.file_extension(url)  # 后缀名
        save_file_name = user_define_name + "_" + file_ext

        # 不能保存,改用open方法
        # urllib.urlretrieve(item[0], self.save_path + save_file_name)
        # 保存图片
        url = self.CheckUrlValidate(url)
        try:
            pic = requests.get(url, timeout=10)
            f = open(self.store_dir + os.sep + save_file_name, 'wb')
            f.write(pic.content)
            f.close()
            print '\ndone save file ' + save_file_name
        except ReadTimeout:
            print 'save file %s failed. cause by timeout(10)' % (save_file_name)
        except MissingSchema:
            print 'invalid url %s' % (url)
        except Exception, e:
            print e

    # 保存文档 
Example 20
Project: NAFAE   Author: jshi31   File: coco.py    MIT License 6 votes vote down vote up
def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print('Please specify target directory')
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print('downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)) 
Example 21
Project: malcode   Author: moonsea   File: zeustracker.py    GNU General Public License v3.0 5 votes vote down vote up
def GetFile(url, file):
    # url = ''.join(['http://vxheaven.org/dl/', file])

    if not os.path.exists('resource/zeustracker'):
        os.makedirs('resource/zeustracker')

    file_path = os.path.join('resource/zeustracker', file)

    print '[+] start downloading ', file
    urllib.urlretrieve(url, file_path) 
Example 22
Project: malcode   Author: moonsea   File: vxheaven.py    GNU General Public License v3.0 5 votes vote down vote up
def GetFile(url, file):
    # url = ''.join(['http://vxheaven.org/dl/', file])

    if not os.path.exists('malcode/src'):
        os.makedirs('malcode/src')

    file_path = os.path.join('malcode', file)

    print '[+] start downloading ', file
    urllib.urlretrieve(url, file_path) 
Example 23
Project: pyblish-win   Author: pyblish   File: test_urllibnet.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def urlretrieve(self, *args):
        return _open_with_retry(urllib.urlretrieve, *args) 
Example 24
Project: pyblish-win   Author: pyblish   File: test_urllibnet.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_basic(self):
        # Test basic functionality.
        file_location,info = self.urlretrieve("http://www.example.com/")
        self.assertTrue(os.path.exists(file_location), "file location returned by"
                        " urlretrieve is not a valid path")
        FILE = file(file_location)
        try:
            self.assertTrue(FILE.read(), "reading from the file location returned"
                         " by urlretrieve failed")
        finally:
            FILE.close()
            os.unlink(file_location) 
Example 25
Project: pyblish-win   Author: pyblish   File: test_urllibnet.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_specified_path(self):
        # Make sure that specifying the location of the file to write to works.
        file_location,info = self.urlretrieve("http://www.example.com/",
                                              test_support.TESTFN)
        self.assertEqual(file_location, test_support.TESTFN)
        self.assertTrue(os.path.exists(file_location))
        FILE = file(file_location)
        try:
            self.assertTrue(FILE.read(), "reading from temporary file failed")
        finally:
            FILE.close()
            os.unlink(file_location) 
Example 26
Project: pyblish-win   Author: pyblish   File: test_urllibnet.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_header(self):
        # Make sure header returned as 2nd value from urlretrieve is good.
        file_location, header = self.urlretrieve("http://www.example.com/")
        os.unlink(file_location)
        self.assertIsInstance(header, mimetools.Message,
                              "header is not an instance of mimetools.Message") 
Example 27
Project: pyblish-win   Author: pyblish   File: test_urllibnet.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_data_header(self):
        logo = "http://www.example.com/"
        file_location, fileheaders = self.urlretrieve(logo)
        os.unlink(file_location)
        datevalue = fileheaders.getheader('Date')
        dateformat = '%a, %d %b %Y %H:%M:%S GMT'
        try:
            time.strptime(datevalue, dateformat)
        except ValueError:
            self.fail('Date value not in %r format', dateformat) 
Example 28
Project: pyblish-win   Author: pyblish   File: test_urllib.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_basic(self):
        # Make sure that a local file just gets its own location returned and
        # a headers value is returned.
        result = urllib.urlretrieve("file:%s" % test_support.TESTFN)
        self.assertEqual(result[0], test_support.TESTFN)
        self.assertIsInstance(result[1], mimetools.Message,
                              "did not get a mimetools.Message instance as "
                              "second returned value") 
Example 29
Project: pyblish-win   Author: pyblish   File: test_urllib.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_reporthook(self):
        # Make sure that the reporthook works.
        def hooktester(count, block_size, total_size, count_holder=[0]):
            self.assertIsInstance(count, int)
            self.assertIsInstance(block_size, int)
            self.assertIsInstance(total_size, int)
            self.assertEqual(count, count_holder[0])
            count_holder[0] = count_holder[0] + 1
        second_temp = "%s.2" % test_support.TESTFN
        self.registerFileForCleanUp(second_temp)
        urllib.urlretrieve(self.constructLocalFileUrl(test_support.TESTFN),
            second_temp, hooktester) 
Example 30
Project: pyblish-win   Author: pyblish   File: test_urllib.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_reporthook_0_bytes(self):
        # Test on zero length file. Should call reporthook only 1 time.
        report = []
        def hooktester(count, block_size, total_size, _report=report):
            _report.append((count, block_size, total_size))
        srcFileName = self.createNewTempFile()
        urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
            test_support.TESTFN, hooktester)
        self.assertEqual(len(report), 1)
        self.assertEqual(report[0][2], 0) 
Example 31
Project: pyblish-win   Author: pyblish   File: test_urllib.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_reporthook_5_bytes(self):
        # Test on 5 byte file. Should call reporthook only 2 times (once when
        # the "network connection" is established and once when the block is
        # read). Since the block size is 8192 bytes, only one block read is
        # required to read the entire file.
        report = []
        def hooktester(count, block_size, total_size, _report=report):
            _report.append((count, block_size, total_size))
        srcFileName = self.createNewTempFile("x" * 5)
        urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
            test_support.TESTFN, hooktester)
        self.assertEqual(len(report), 2)
        self.assertEqual(report[0][1], 8192)
        self.assertEqual(report[0][2], 5) 
Example 32
Project: pyblish-win   Author: pyblish   File: test_urllib.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_short_content_raises_ContentTooShortError_without_reporthook(self):
        self.fakehttp('''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1

FF
''')
        try:
            self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 'http://example.com/')
        finally:
            self.unfakehttp() 
Example 33
Project: twitter-export-image-fill   Author: mwichary   File: twitter-export-image-fill.py    The Unlicense 5 votes vote down vote up
def download_image(url, local_filename):
  if not download_images:
    return True

  try:
    urlretrieve(url, local_filename)
    return True
  except:
    return False


# Download a given video via youtube-dl 
Example 34
Project: twitter-export-image-fill   Author: mwichary   File: twitter-export-image-fill.py    The Unlicense 5 votes vote down vote up
def download_or_copy_avatar(user, total_image_count, total_video_count, total_media_precount, year_str, month_str):
  # _orig existing means we already processed this user
  if 'profile_image_url_https_orig' in user:
    return False

  avatar_url = user['profile_image_url_https']
  extension = os.path.splitext(avatar_url)[1]
  local_filename = "img/avatars/%s%s" % (user['screen_name'], extension)

  if not os.path.isfile(local_filename):
    can_be_copied = args.EARLIER_ARCHIVE_PATH and os.path.isfile(earlier_archive_path + local_filename)

    output_line("[%0.1f%%] %s/%s: %s avatar..." %
      ((total_image_count + total_video_count) / total_media_precount * 100, \
      year_str, month_str, \
      "Copying" if can_be_copied else "Downloading"))

    # If using an earlier archive as a starting point, try to see if the
    # avatar image is there and can be copied
    if can_be_copied:
      copyfile(earlier_archive_path + local_filename, local_filename)
    # Otherwise download it
    else:
      try:
        urlretrieve(avatar_url, local_filename)
      except:
        # Okay to quietly fail, this is just an avatar
        # (And, apparently, some avatars return 404.)
        return False

  user['profile_image_url_https_orig'] = user['profile_image_url_https']
  user['profile_image_url_https'] = local_filename
  return True 
Example 35
Project: petuk.corp   Author: fnugrahendi   File: wget.py    GNU General Public License v2.0 5 votes vote down vote up
def callback_progress(blocks, block_size, total_size, bar_function):
    """callback function for urlretrieve that is called when connection is
    created and when once for each block

    draws adaptive progress bar in terminal/console

    use sys.stdout.write() instead of "print,", because it allows one more
    symbol at the line end without linefeed on Windows

    :param blocks: number of blocks transferred so far
    :param block_size: in bytes
    :param total_size: in bytes, can be -1 if server doesn't return it
    :param bar_function: another callback function to visualize progress
    """
    global __current_size
 
    width = min(100, get_console_width())

    if sys.version_info[:3] == (3, 3, 0):  # regression workaround
        if blocks == 0:  # first call
            __current_size = 0
        else:
            __current_size += block_size
        current_size = __current_size
    else:
        current_size = min(blocks*block_size, total_size)
    progress = bar_function(current_size, total_size, width)
    if progress:
        sys.stdout.write("\r" + progress) 
Example 36
Project: petuk.corp   Author: fnugrahendi   File: wget.py    GNU General Public License v2.0 5 votes vote down vote up
def download(url, out=None, bar=bar_adaptive):
    """High level function, which downloads URL into tmp file in current
    directory and then renames it to filename autodetected from either URL
    or HTTP headers.

    :param bar: function to track download progress (visualize etc.)
    :param out: output filename or directory
    :return:    filename where URL is downloaded to
    """
    names = dict()
    names["out"] = out or ''
    names["url"] = filename_from_url(url)
    # get filename for temp file in current directory
    prefix = (names["url"] or names["out"] or ".") + "."
    (fd, tmpfile) = tempfile.mkstemp(".tmp", prefix=prefix, dir=".")
    os.close(fd)
    os.unlink(tmpfile)

    # set progress monitoring callback
    def callback_charged(blocks, block_size, total_size):
        # 'closure' to set bar drawing function in callback
        callback_progress(blocks, block_size, total_size, bar_function=bar)
    if bar:
        callback = callback_charged
    else:
        callback = None

    (tmpfile, headers) = urllib.urlretrieve(url, tmpfile, callback)
    names["header"] = filename_from_headers(headers)
    if os.path.isdir(names["out"]):
        filename = names["header"] or names["url"]
        filename = names["out"] + "/" + filename
    else:
        filename = names["out"] or names["header"] or names["url"]
    # add numeric ' (x)' suffix if filename already exists
    if os.path.exists(filename):
        filename = filename_fix_existing(filename)
    shutil.move(tmpfile, filename)

    #print headers
    return filename 
Example 37
Project: flasky   Author: RoseOu   File: _phpbuiltins.py    MIT License 5 votes vote down vote up
def get_php_references():
        download = urllib.urlretrieve(PHP_MANUAL_URL)
        tar = tarfile.open(download[0])
        tar.extractall()
        tar.close()
        for file in glob.glob("%s%s" % (PHP_MANUAL_DIR, PHP_REFERENCE_GLOB)):
            yield file
        os.remove(download[0]) 
Example 38
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: data.py    Apache License 2.0 5 votes vote down vote up
def get_avazu_data(data_dir, data_name, url):
    if not os.path.isdir(data_dir):
        os.mkdir(data_dir)
    os.chdir(data_dir)
    if (not os.path.exists(data_name)):
        print("Dataset " + data_name + " not present. Downloading now ...")
        import urllib
        zippath = os.path.join(data_dir, data_name + ".bz2")
        urllib.urlretrieve(url + data_name + ".bz2", zippath)
        os.system("bzip2 -d %r" % data_name + ".bz2")
        print("Dataset " + data_name + " is now present.")
    os.chdir("..") 
Example 39
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: capsulenet.py    Apache License 2.0 5 votes vote down vote up
def download_data(url, force_download=False):
    fname = url.split("/")[-1]
    if force_download or not os.path.exists(fname):
        urllib.urlretrieve(url, fname)
    return fname 
Example 40
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: compare_layers.py    Apache License 2.0 5 votes vote down vote up
def read_image(img_path, image_dims=None, mean=None):
    """
    Reads an image from file path or URL, optionally resizing to given image dimensions and
    subtracting mean.
    :param img_path: path to file, or url to download
    :param image_dims: image dimensions to resize to, or None
    :param mean: mean file to subtract, or None
    :return: loaded image, in RGB format
    """

    import urllib

    filename = img_path.split("/")[-1]
    if img_path.startswith('http'):
        urllib.urlretrieve(img_path, filename)
        img = cv2.imread(filename)
    else:
        img = cv2.imread(img_path)

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    if image_dims is not None:
        img = cv2.resize(img, image_dims)  # resize to image_dims to fit model
    img = np.rollaxis(img, 2) # change to (c, h, w) order
    img = img[np.newaxis, :]  # extend to (n, c, h, w)
    if mean is not None:
        mean = np.array(mean)
        if mean.shape == (3,):
            mean = mean[np.newaxis, :, np.newaxis, np.newaxis]  # extend to (n, c, 1, 1)
        img = img.astype(np.float32) - mean # subtract mean

    return img 
Example 41
Project: ANN   Author: waynezv   File: baidu_spy.py    MIT License 5 votes vote down vote up
def getImg(html):
    reg = r'src="(.+?\.jpg)" pic_ext'
    imgre = re.compile(reg)
    imglist = imgre.findall(html)
    x = 0
    for imgurl in imglist:
        urllib.urlretrieve(imgurl, '%s.jpg' % x)
        x = x + 1
        html = getHtml("http://tieba.baidu.com/p/2460150866")
        getImg(html) 
Example 42
Project: view-finding-network   Author: yiling-chen   File: download_images.py    GNU General Public License v3.0 5 votes vote down vote up
def fetch_image(url):
    filename = os.path.split(url)[-1]
    full_path = os.path.join(image_folder, filename)
    if os.path.exists(full_path):
        return

    print '\tDownloading', filename
    file, mime = urllib.urlretrieve(url)
    photo = Image.open(file)
    photo.save(full_path) 
Example 43
Project: Paradrop   Author: ParadropLabs   File: snappy.py    Apache License 2.0 5 votes vote down vote up
def getFile(self):
        """
        Downloads the file (if necessary) and makes sure it exists.
        """
        if self.source.startswith("http"):
            self.path = "/tmp"

            print("Downloading {}...".format(self.source))
            dest = os.path.join(self.path, self.filename)
            urllib.urlretrieve(self.source, dest)
            self.source = dest

        return os.path.isfile(self.source) 
Example 44
Project: ArtGAN   Author: cs-chan   File: inception_score.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _init_inception():
    global softmax
    if not os.path.exists(MODEL_DIR):
        os.makedirs(MODEL_DIR)
    filename = DATA_URL.split('/')[-1]
    filepath = os.path.join(MODEL_DIR, filename)
    if not os.path.exists(filepath):
        def _progress(count, block_size, total_size):
            sys.stdout.write('\r>> Downloading %s %.1f%%' % (
                filename, float(count * block_size) / float(total_size) * 100.0))
            sys.stdout.flush()

        filepath, _ = urllib.urlretrieve(DATA_URL, filepath, _progress)
        print()
        statinfo = os.stat(filepath)
        print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
    tarfile.open(filepath, 'r:gz').extractall(MODEL_DIR)
    with tf.gfile.FastGFile(os.path.join(
            MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='')
    # Works with an arbitrary minibatch size.
    with tf.Session() as sess:
        pool3 = sess.graph.get_tensor_by_name('pool_3:0')
        ops = pool3.graph.get_operations()
        for op_idx, op in enumerate(ops):
            for o in op.outputs:
                shape = o.get_shape()
                shape = [s.value for s in shape]
                new_shape = []
                for j, s in enumerate(shape):
                    if s == 1 and j == 0:
                        new_shape.append(None)
                    else:
                        new_shape.append(s)
                o._shape = tf.TensorShape(new_shape)
        w = sess.graph.get_operation_by_name("softmax/logits/MatMul").inputs[1]
        logits = tf.matmul(tf.squeeze(pool3), w)
        softmax = tf.nn.softmax(logits) 
Example 45
Project: fine-lm   Author: akzaidi   File: utils.py    MIT License 5 votes vote down vote up
def download(url, download_dir):
  outname = os.path.join(download_dir, os.path.basename(url))
  if tf.gfile.Exists(outname):
    print('Found %s, skipping download' % outname)
    return outname
  inprogress = outname + '.incomplete'
  print('Downloading %s' % url)
  inprogress, _ = urllib.urlretrieve(url, inprogress)
  tf.gfile.Rename(inprogress, outname)
  return outname 
Example 46
Project: Recipes   Author: Lasagne   File: massachusetts_road_dataset_utils.py    MIT License 5 votes vote down vote up
def download_dataset(all_tasks, num_workers=4):
    def urlretrieve_star(args):
        return urlretrieve(*args)

    pool = Pool(num_workers)
    pool.map(urlretrieve_star, all_tasks)
    pool.close()
    pool.join() 
Example 47
Project: reimplement-paper   Author: jsqihui   File: utils.py    Apache License 2.0 5 votes vote down vote up
def download_file(url, filename):
    """ file downloader with progress bar """
    with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=filename) as t: 
        urllib.urlretrieve(url, filename=filename, reporthook=t.update_to, data=None) 
Example 48
Project: epschedule   Author: guberti   File: run_python_tests.py    MIT License 5 votes vote down vote up
def download(url, filename):
    print 'Downloading ' + filename
    urllib.urlretrieve(url, make_path(filename)) 
Example 49
Project: Lofter-image-Crawler   Author: sparrow629   File: LofterCrawler.py    MIT License 5 votes vote down vote up
def getImg(html,url,posturl):
	global Number

	reg = r'src="(.*?\.jpe*?g\?.*?)"'
	imgre = re.compile(reg)
	imglist_none = re.findall(imgre, html)
	imglist = list(set(imglist_none))
	# return imglist

	postfix = '.lofter.com'
	blogname = url[7:url.index(postfix)]
	path = 'Lofterimgdownload/%s/' % (blogname)
	if not os.path.exists(path):
		os.makedirs(path)

	if imglist:
		Postname = getPostname(posturl)
		print len(imglist)
		# print imglist
		i = 0
		for imgurl in imglist:
			Name = Postname + '_' + str(i)

			target = path + '%s.jpg' % Name
			i += 1
			print "Downloading %s " % target
			urllib.urlretrieve(imgurl, target)

	else:
		print 'There is no image!' 
Example 50
Project: Lofter-image-Crawler   Author: sparrow629   File: crawler.py    MIT License 5 votes vote down vote up
def getImg(html,url,posturl):
	global Number

	reg = r'src="(.*?\.jpg\?.*?)"'
	imgre = re.compile(reg)
	imglist_none = re.findall(imgre, html)
	imglist = list(set(imglist_none))
	# return imglist 
	postfix = '.lofter.com'
	blogname = url[7:url.index(postfix)]
	path = 'Lofterimgdownload/%s' % (blogname)
	if not os.path.exists(path):
		os.makedirs(path)
	if imglist:
		Name = getPostname(posturl)

		print len(imglist)
		print imglist
		i = 0
		for imgurl in imglist:
			Number += 1
			print "Downloading %s image" % (Name+str(i))
			target = path + '%s.jpg' % (Name+str(i))
			i += 1
			# urllib.urlretrieve(imgurl, target)

	else:
		print 'There is no image!' 
Example 51
Project: Lofter-image-Crawler   Author: sparrow629   File: crawler_1.py    MIT License 5 votes vote down vote up
def getImg(html):
	reg = r'src="(.*?\.jpg.*?stripmeta\=0)"'
	imgre = re.compile(reg)
	imglist_none = re.findall(imgre, html)
	imglist = list(set(imglist_none))
	# return imglist 
	path = 'Lofterimgdownload/'
	if not os.path.exists(path):
		os.makedirs(path)
	x = 0
	for imgurl in imglist:
		target = path + '%s.jpg' % x
		urllib.urlretrieve(imgurl, target)
		x+=1 
Example 52
Project: Lofter-image-Crawler   Author: sparrow629   File: Lofter_image_crawler.py    MIT License 5 votes vote down vote up
def getImg(html,url,posturl):
	global Number

	reg = r'src="(.*?\.jpg\?.*?)"'
	imgre = re.compile(reg)
	imglist_none = re.findall(imgre, html)
	imglist = list(set(imglist_none))
	# return imglist

	postfix = '.lofter.com'
	blogname = url[7:url.index(postfix)]
	path = 'Lofterimgdownload/%s/' % (blogname)
	if not os.path.exists(path):
		os.makedirs(path)

	if imglist:
		Postname = getPostname(posturl)
		print len(imglist)
		# print imglist
		i = 0
		for imgurl in imglist:
			Number += 1
			Name = Postname + '_' + str(i)
			print "Downloading %s image" % Name
			target = path + '%s.jpg' % Name
			i += 1
			print target
			urllib.urlretrieve(imgurl, target)
	else:
		print 'There is no image!' 
Example 53
Project: Lofter-image-Crawler   Author: sparrow629   File: Lofter_image_crawler_multiprocess.py    MIT License 5 votes vote down vote up
def getImg(html,url,posturl):
	global Number

	reg = r'src="(.*?\.jpe*?g\?.*?)"'
	imgre = re.compile(reg)
	imglist_none = re.findall(imgre, html)
	imglist = list(set(imglist_none))
	# return imglist

	postfix = '.lofter.com'
	blogname = url[7:url.index(postfix)]
	path = 'Lofterimgdownload/%s/' % (blogname)
	if not os.path.exists(path):
		os.makedirs(path)

	if imglist:
		Postname = getPostname(posturl)
		print len(imglist)
		# print imglist
		i = 0
		for imgurl in imglist:
			Name = Postname + '_' + str(i)

			target = path + '%s.jpg' % Name
			i += 1
			print "Downloading %s " % target
			urllib.urlretrieve(imgurl, target)
		countQueue.put(i)
	else:
		print 'There is no image!'

	print('''
	-------------------------------------
	      Process ID: %s finished
	--------------------------------------
	''' % os.getpid()) 
Example 54
Project: openhatch   Author: campbe13   File: _phpbuiltins.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def get_php_references():
        download = urllib.urlretrieve(PHP_MANUAL_URL)
        tar = tarfile.open(download[0])
        tar.extractall()
        tar.close()
        for file in glob.glob("%s%s" % (PHP_MANUAL_DIR, PHP_REFERENCE_GLOB)):
            yield file
        os.remove(download[0]) 
Example 55
Project: service-juniper-vpn   Author: docksal   File: juniper-vpn-wrap.py    GNU General Public License v2.0 5 votes vote down vote up
def tncc_init(self):
        class_names = ('net.juniper.tnc.NARPlatform.linux.LinuxHttpNAR',
                       'net.juniper.tnc.HttpNAR.HttpNAR')
        self.class_name = None

        self.tncc_jar = os.path.expanduser('~/.juniper_networks/tncc.jar')
        try:
            if zipfile.ZipFile(self.tncc_jar, 'r').testzip() is not None:
                raise Exception()
        except:
            print 'Downloading tncc.jar...'
            mkdir_p(os.path.expanduser('~/.juniper_networks'))
            urllib.urlretrieve('https://' + self.vpn_host
                               + '/dana-cached/hc/tncc.jar', self.tncc_jar)

        with zipfile.ZipFile(self.tncc_jar, 'r') as jar:
            for name in class_names:
                try:
                    jar.getinfo(name.replace('.', '/') + '.class')
                    self.class_name = name
                    break
                except:
                    pass

        if self.class_name is None:
            raise Exception('Could not find class name for', self.tncc_jar)

        self.tncc_preload = \
            os.path.expanduser('~/.juniper_networks/tncc_preload.so')
        if not os.path.isfile(self.tncc_preload):
            raise Exception('Missing', self.tncc_preload) 
Example 56
Project: Repobot   Author: Desgard   File: _php_builtins.py    MIT License 5 votes vote down vote up
def get_php_references():
        download = urlretrieve(PHP_MANUAL_URL)
        tar = tarfile.open(download[0])
        tar.extractall()
        tar.close()
        for file in glob.glob("%s%s" % (PHP_MANUAL_DIR, PHP_REFERENCE_GLOB)):
            yield file
        os.remove(download[0]) 
Example 57
Project: CNN-chest-x-ray-abnormalities-localization   Author: TomaszRewak   File: scraper.py    MIT License 5 votes vote down vote up
def download_image(basePath, info):
    urllib.urlretrieve(
        info['url'],
        os.path.join(basePath, info['name'])
    ) 
Example 58
Project: CNN-chest-x-ray-abnormalities-localization   Author: TomaszRewak   File: download_model.py    MIT License 5 votes vote down vote up
def main(path, url):
    urllib.urlretrieve (url, path) 
Example 59
Project: Pancas   Author: Sup3Roque   File: plugintools.py    GNU General Public License v2.0 5 votes vote down vote up
def show_picture(url):
    local_folder=os.path.join(get_data_path(),"images")
    if not os.path.exists(local_folder):
        try: os.mkdir(local_folder)
        except: pass
    local_file=os.path.join(local_folder,"temp.jpg")
    urllib.urlretrieve(url,local_file) # Download picture
    xbmc.executebuiltin("SlideShow("+local_folder+")") # Show picture 
Example 60
Project: Pancas   Author: Sup3Roque   File: downloader.py    GNU General Public License v2.0 5 votes vote down vote up
def download(url, dest, dp = None):
    if not dp:
        dp = xbmcgui.DialogProgress()
        dp.create("[COLOR white]Pancas Wizard[/COLOR]","Downloading & Copying Files",' ', ' ')
    dp.update(0)
    urllib.urlretrieve(url,dest,lambda nb, bs, fs, url=url: _pbhook(nb,bs,fs,url,dp)) 
Example 61
Project: Pancas   Author: Sup3Roque   File: downloader.py    GNU General Public License v2.0 5 votes vote down vote up
def download(url, dest, dp = None):
    if not dp:
        dp = xbmcgui.DialogProgress()
        dp.create("Status...","Checking Installation",' ', ' ')
    dp.update(0)
    urllib.urlretrieve(url,dest,lambda nb, bs, fs, url=url: _pbhook(nb,bs,fs,url,dp)) 
Example 62
Project: python-drumbo   Author: accraze   File: appveyor-bootstrap.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def download_file(url, path):
    print("Downloading: {} (into {})".format(url, path))
    progress = [0, 0]

    def report(count, size, total):
        progress[0] = count * size
        if progress[0] - progress[1] > 1000000:
            progress[1] = progress[0]
            print("Downloaded {:,}/{:,} ...".format(progress[1], total))

    dest, _ = urlretrieve(url, path, reporthook=report)
    return dest 
Example 63
Project: RMDL   Author: eric-erki   File: Download_Glove.py    GNU General Public License v3.0 5 votes vote down vote up
def download_and_extract(data='Wikipedia'):
    """
    Download and extract the GloVe
    :return: None
    """

    if data=='Wikipedia':
        DATA_URL = 'http://nlp.stanford.edu/data/glove.6B.zip'
    elif data=='Common_Crawl_840B':
        DATA_URL = 'http://nlp.stanford.edu/data/wordvecs/glove.840B.300d.zip'
    elif data=='Common_Crawl_42B':
        DATA_URL = 'http://nlp.stanford.edu/data/wordvecs/glove.42B.300d.zip'
    elif data=='Twitter':
        DATA_URL = 'http://nlp.stanford.edu/data/wordvecs/glove.twitter.27B.zip'
    else:
        print("prameter should be Twitter, Common_Crawl_42B, Common_Crawl_840B, or Wikipedia")
        exit(0)


    dest_directory = DATA_DIR
    if not os.path.exists(dest_directory):
        os.makedirs(dest_directory)
    filename = DATA_URL.split('/')[-1]
    filepath = os.path.join(dest_directory, filename)
    print(filepath)

    path = os.path.abspath(dest_directory)
    if not os.path.exists(filepath):
        def _progress(count, block_size, total_size):
            sys.stdout.write('\rDownloading %s %.2f%%' % (filename,
                                                          float(count * block_size) / float(total_size) * 100.0))
            sys.stdout.flush()

        filepath, _ = urllib.urlretrieve(DATA_URL, filepath)#, reporthook=_progress)


        zip_ref = zipfile.ZipFile(filepath, 'r')
        zip_ref.extractall(DATA_DIR)
        zip_ref.close()
    return path 
Example 64
Project: TransferLearning   Author: ZhangJUJU   File: dataset_mnist.py    MIT License 5 votes vote down vote up
def download(self):
        filename = os.path.join(self.root, self.filename)
        dirname = os.path.dirname(filename)
        if not os.path.isdir(dirname):
            os.mkdir(dirname)
        if os.path.isfile(filename):
            return
        print("Download %s to %s" % (self.url, filename))
        urllib.urlretrieve(self.url, filename)
        print("[DONE]")
        return 
Example 65
Project: TransferLearning   Author: ZhangJUJU   File: dataset_usps.py    MIT License 5 votes vote down vote up
def download(self):
        filename = os.path.join(self.root, self.filename)
        dirname = os.path.dirname(filename)
        if not os.path.isdir(dirname):
            os.mkdir(dirname)
        if os.path.isfile(filename):
            return
        print("Download %s to %s" % (self.url, filename))
        urllib.urlretrieve(self.url, filename)
        print("[DONE]")
        return 
Example 66
Project: inkscape-download-palette   Author: olibia   File: download_palette.py    GNU General Public License v3.0 5 votes vote down vote up
def download_palettes(self):
        urls = self.palettes_urls()
        for palette in self.get_selected_palettes():
            url = urls.get(palette)

            if url is not None:
                try:
                    urllib.urlretrieve(url, self.file_path(palette))
                except:
                    inkex.errormsg(_("File %s.gpl could not be downloaded! Please try again." % palette)) 
Example 67
Project: DNoiSe   Author: jankais3r   File: DNoiSe.py    MIT License 5 votes vote down vote up
def download_domains():
	
	start_time = time.time()
	
	# Download the Cisco Umbrella list. More info: https://s3-us-west-1.amazonaws.com/umbrella-static/index.html
	try:
		print >> log_file, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime(datetime.datetime.now().timetuple())))+" Downloading the domain list…"
		urllib.urlretrieve("http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip", filename=working_directory+"domains.zip")
	except:
		print >> log_file, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime(datetime.datetime.now().timetuple())))+" Can't download the domain list. Quitting."
		exit()
	
	# Create a SQLite database and import the domain list
	try:
		db = sqlite3.connect(working_directory + "domains.sqlite")
		db.execute("CREATE TABLE Domains (ID INT PRIMARY KEY, Domain TEXT)")
		
		# Load the CSV into our database
		print >> log_file, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime(datetime.datetime.now().timetuple())))+" Importing to sqlite…"
		df = pandas.read_csv(working_directory + "domains.zip", compression = 'zip', names = ["ID", "Domain"])
		df.to_sql("Domains", db, if_exists = "append", index = False)
	
		db.close()
	
		os.remove(working_directory + "domains.zip")
	except:
		print >> log_file, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime(datetime.datetime.now().timetuple())))+" Import failed. Quitting."
		exit()
	
	# Running this on 1st gen Raspberry Pi can take up to 10 minutes. Be patient.
	print >> log_file, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime(datetime.datetime.now().timetuple())))+" Done. It took "+str(round((time.time()-start_time),0))[0:-2]+"s to download and process the list."

# A simple loop that makes sure we have an Internet connection - it can take a while for pi-hole to get up and running after a reboot. 
Example 68
Project: rnaseq-pipeline   Author: PavlidisLab   File: geo.py    The Unlicense 5 votes vote down vote up
def run(self):
        destdir = os.path.dirname(self.output().path)
        metadata_xml_tgz = join(destdir, '{}_family.xml.tgz'.format(self.gse))

        # download compressed metadata
        # FIXME: use Entrez Web API
        urllib.urlretrieve('ftp://ftp.ncbi.nlm.nih.gov/geo/series/{0}/{1}/miniml/{1}_family.xml.tgz'.format(self.gse[:-3] + 'nnn', self.gse),
                           reporthook=lambda numblocks, blocksize, totalsize: self.set_progress_percentage(100.0 * numblocks * blocksize / totalsize),
                           filename=metadata_xml_tgz)

        # extract metadata
        # FIXME: this is not atomic
        with tarfile.open(metadata_xml_tgz, 'r:gz') as tf:
            tf.extract(os.path.basename(self.output().path), destdir) 
Example 69
Project: rnaseq-pipeline   Author: PavlidisLab   File: arrayexpress.py    The Unlicense 5 votes vote down vote up
def run(self):
        with self.output().temporary_path() as dest_filename:
            urllib.urlretrieve(self.fastq_url,
                               reporthook=lambda numblocks, blocksize, totalsize: self.set_progress_percentage(100.0 * numblocks * blocksize / totalsize),
                               filename=dest_filename) 
Example 70
Project: cs4065   Author: mmc-tudelft   File: wraprec.py    MIT License 5 votes vote down vote up
def _deploy_wraprec(cls):
    print '[notice] deploying WrapRec'

    try:
      # Platform specific steps.
      if PLATFORM != 'Windows':
        print '[notice] installing mono for Unix'
        cls._install_mono_unix()

      # Prepare destination folder.
      if os.path.exists(WRAPREC_PATH):
        shutil.rmtree(WRAPREC_PATH)
      os.makedirs(WRAPREC_PATH)

      # Download and extract the WrapRec package.
      print '[debug] retrieving %s' % WRAPREC_URL
      (temp_file_path, headers) = urllib.urlretrieve(WRAPREC_URL)
      tar_file = tarfile.open(temp_file_path, 'r:gz')
      tar_file.extractall(WRAPREC_PATH)

      # Save depoyed version tag.
      with open(WRAPREC_VERSION_TAG_PATH, 'w') as f:
        f.write('%s\n' % WRAPREC_VERSION)
    except Exception as e:
      # Remove.
      if os.path.exists(WRAPREC_PATH):
        shutil.rmtree(WRAPREC_PATH)

      # Re-raise exception.
      raise e 
Example 71
Project: cs4065   Author: mmc-tudelft   File: datasets.py    MIT License 5 votes vote down vote up
def _fetch_dataset(cls, url, dataset_path):
    os.makedirs(dataset_path)
    (temp_file_path, headers) = urllib.urlretrieve(url)
    tar_file = tarfile.open(temp_file_path, 'r:gz')
    tar_file.extractall(dataset_path) 
Example 72
Project: VTuber_Unity   Author: kwea123   File: dlib_detector.py    MIT License 5 votes vote down vote up
def __init__(self, device, path_to_detector=None, verbose=False):
        super().__init__(device, verbose)

        base_path = os.path.join(appdata_dir('face_alignment'), "data")

        # Initialise the face detector
        if 'cuda' in device:
            if path_to_detector is None:
                path_to_detector = os.path.join(
                    base_path, "mmod_human_face_detector.dat")

                if not os.path.isfile(path_to_detector):
                    print("Downloading the face detection CNN. Please wait...")

                    path_to_temp_detector = os.path.join(
                        base_path, "mmod_human_face_detector.dat.download")

                    if os.path.isfile(path_to_temp_detector):
                        os.remove(os.path.join(path_to_temp_detector))

                    request_file.urlretrieve(
                        "https://www.adrianbulat.com/downloads/dlib/mmod_human_face_detector.dat",
                        os.path.join(path_to_temp_detector))

                    os.rename(os.path.join(path_to_temp_detector), os.path.join(path_to_detector))

            self.face_detector = dlib.cnn_face_detection_model_v1(path_to_detector)
        else:
            self.face_detector = dlib.get_frontal_face_detector() 
Example 73
Project: VTuber_Unity   Author: kwea123   File: sfd_detector.py    MIT License 5 votes vote down vote up
def __init__(self, device, path_to_detector=None, verbose=False):
        super(SFDDetector, self).__init__(device, verbose)

        base_path = "face_alignment/ckpts"

        # Initialise the face detector
        if path_to_detector is None:
            path_to_detector = os.path.join(
                base_path, "s3fd_convert.pth")

            if not os.path.isfile(path_to_detector):
                print("Downloading the face detection CNN. Please wait...")

                path_to_temp_detector = os.path.join(
                    base_path, "s3fd_convert.pth.download")

                if os.path.isfile(path_to_temp_detector):
                    os.remove(os.path.join(path_to_temp_detector))

                request_file.urlretrieve(
                    "https://www.adrianbulat.com/downloads/python-fan/s3fd_convert.pth",
                    os.path.join(path_to_temp_detector))

                os.rename(os.path.join(path_to_temp_detector), os.path.join(path_to_detector))

        self.face_detector = s3fd()
        self.face_detector.load_state_dict(torch.load(path_to_detector))
        self.face_detector.to(device)
        self.face_detector.eval() 
Example 74
Project: opensourcegovernment   Author: ivbeg   File: osgcmd.py    Apache License 2.0 5 votes vote down vote up
def updatelist():
    """Update list of organizations"""
    CIVHACKERS_LIST = 'https://raw.githubusercontent.com/github/government.github.com/gh-pages/_data/civic_hackers.yml'
    GOVERNMENT_LIST = 'https://raw.githubusercontent.com/github/government.github.com/gh-pages/_data/governments.yml'
    RESEARCH_LIST = 'https://raw.githubusercontent.com/github/government.github.com/gh-pages/_data/research.yml'
    print 'Downloading lists from government.github.com'
    urllib.urlretrieve(CIVHACKERS_LIST, 'civic_hackers.yml')
    urllib.urlretrieve(GOVERNMENT_LIST, 'governments.yml')
    urllib.urlretrieve(RESEARCH_LIST, 'research.yml')
    output = file('merged.yml', 'w')
    for f in ['civic_hackers.yml', 'governments.yml', 'research.yml']:
        data = file(f).read()
        output.write(data) 
Example 75
Project: matrix_factorization   Author: jbaker92   File: movielens.py    MIT License 5 votes vote down vote up
def load_dataset():
    """
    Download and unzip the movielens dataset. Make sure you have a valid internet connection!

    Outputs: movielens directory to ../data
    """
    if not os.path.exists( defaults.data_dir ):
        os.makedirs( defaults.data_dir )
    urllib.urlretrieve("http://files.grouplens.org/datasets/movielens/ml-100k.zip",
            defaults.data_dir + "movielens.zip")
    zip_ref = zipfile.ZipFile( defaults.data_dir + 'movielens.zip', 'r')
    zip_ref.extractall( defaults.data_dir )
    zip_ref.close()
    os.remove( defaults.data_dir + 'movielens.zip' ) 
Example 76
Project: dupuis   Author: redapple   File: appveyor-bootstrap.py    MIT License 5 votes vote down vote up
def download_file(url, path):
    print("Downloading: {} (into {})".format(url, path))
    progress = [0, 0]

    def report(count, size, total):
        progress[0] = count * size
        if progress[0] - progress[1] > 1000000:
            progress[1] = progress[0]
            print("Downloaded {:,}/{:,} ...".format(progress[1], total))

    dest, _ = urlretrieve(url, path, reporthook=report)
    return dest 
Example 77
Project: Instalker   Author: yashwantbezawada   File: Instalker.py    GNU General Public License v3.0 5 votes vote down vote up
def loop_a(count):
	images = driver.find_elements_by_tag_name('img')
	for image in images:
  		src = image.get_attribute("src")
		tmp_name = src
		tmp_name = str(tmp_name)
		tmp_name = tmp_name.replace('/', '_')
		if src:
			if not os.path.exists(path+"/"+tmp_name+".jpg"):
				try:
        				urllib.urlretrieve(src,path+"/"+tmp_name+".jpg")
				except Exception,e:
					continue 
Example 78
Project: pyhanlp   Author: hankcs   File: __init__.py    Apache License 2.0 4 votes vote down vote up
def download(url, path):
    opener = urllib.build_opener()
    opener.addheaders = [('User-agent',
                          'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36')]
    urllib.install_opener(opener)
    if os.path.isfile(path):
        print('使用本地 {}, 忽略 {}'.format(path, url))
        return True
    else:
        print('下载 {} 到 {}'.format(url, path))
        tmp_path = '{}.downloading'.format(path)
        remove_file(tmp_path)
        try:
            def reporthook(count, block_size, total_size):
                global start_time, progress_size
                if count == 0:
                    start_time = time.time()
                    progress_size = 0
                    return
                duration = time.time() - start_time
                duration = max(1e-8, duration)  # 防止除零错误
                progress_size = int(count * block_size)
                if progress_size > total_size:
                    progress_size = total_size
                speed = int(progress_size / (1024 * duration))
                ratio = progress_size / total_size
                ratio = max(1e-8, ratio)
                percent = ratio * 100
                eta = duration / ratio * (1 - ratio)
                minutes = eta / 60
                seconds = eta % 60
                sys.stdout.write("\r%.2f%%, %d MB, %d KB/s, 还有 %d 分 %2d 秒   " %
                                 (percent, progress_size / (1024 * 1024), speed, minutes, seconds))
                sys.stdout.flush()

            import socket
            socket.setdefaulttimeout(10)
            urllib.urlretrieve(url, tmp_path, reporthook)
            print()
        except BaseException as e:
            eprint('下载失败 {} 由于 {}'.format(url, repr(e)))
            doc_url = 'https://github.com/hankcs/pyhanlp'
            eprint('请参考 %s 执行手动安装.' % doc_url)
            eprint('或手动下载 {} 到 {}'.format(url, path))
            if os.path.isfile(tmp_path):
                os.remove(tmp_path)
            browser_open(doc_url)
            exit(1)
        remove_file(path)
        os.rename(tmp_path, path)
    return True 
Example 79
Project: CTGViewer   Author: jirispilka   File: GuiForms.py    GNU General Public License v3.0 4 votes vote down vote up
def _start_download(self):
        """
        prepare for downloading - check the directories and url
        """

        self._bAbort = False

        self._url = str(self.ui.lineEditUrl.text())
        self.ui.textEditInfo.clear()

        self._dirDest = str(self.ui.lnDestDir.text())

        try:
            check_directory_exists(self._dirDest)
        except Exception as ex:
            self.msgBoxError.setText('Error in destination directory')
            self.msgBoxError.setInformativeText(ex.message)
            self.msgBoxError.exec_()
            return

        files = Common.directory_listing(self._dirDest)

        ret = 0
        if len(files) > 0:
            self.msgBoxWarn.setText('The destination directory is not empty.')
            self.msgBoxWarn.setInformativeText(""" It is recommended to download database into an empty directory.
                                               Continue download? """)
            ret = self.msgBoxWarn.exec_()

        if ret == Qt.QMessageBox.No:
            return

        # check if MD5SUMS exist
        name_dest = os.path.join(self._dirDest, self._MD5SUMS)
        fname, dummy = urllib.urlretrieve(self._url + self._MD5SUMS, name_dest)

        b_not_found = False
        with io.open(fname, 'rt') as fin:
            for s in fin.readlines():
                ind = s.find(fname + ' was not found on this server')
                if ind > -1:
                    b_not_found = True

        if b_not_found:
            self.msgBoxError.setText('Error in the database url')
            s = 'File: ' + self._MD5SUMS + ' was not found at the server.'
            s += 'Download can not continue without this file. ' \
                 'Please check the database url and make sure that file MD5SUMS exists.'
            self.msgBoxError.setInformativeText(s)
            self.msgBoxError.exec_()
            return
        else:
            # if all ok -> run the actual download
            self.ui.btnStart.setEnabled(False)
            self.ui.btnStop.setEnabled(True)

            if self.run_download() == 0:
                self.ui.textEditInfo.append('Download completed')
                self.ui.btnStart.setEnabled(True)
                self.ui.btnStop.setEnabled(False) 
Example 80
Project: CTGViewer   Author: jirispilka   File: GuiForms.py    GNU General Public License v3.0 4 votes vote down vote up
def run_download(self):
        """
        Download all files from given url and MD5SUMS file
        """

        # set the progress bar
        md5_file = os.path.join(self._dirDest, self._MD5SUMS)
        nrfiles = Common.get_mumber_lines(md5_file)
        self.ui.progressBar.setRange(1, nrfiles)
        self.ui.progressBar.update()

        # process all files in file: self._MD5SUMS

        with io.open(md5_file, 'rt') as fin:

            processed_files = 0
            for line in fin.readlines():

                if self._bAbort:
                    self.ui.textEditInfo.append('ABORTED')
                    return 1

                md5, fname = line.split('  ')
                md5 = md5.strip()
                fname = fname.strip()

                # do not download this file
                if fname.find('HEADER.shtml') > -1:
                    processed_files += 1
                    continue

                cnt = 0
                bmd5_ok = False
                name_dest = os.path.join(self._dirDest, fname)

                # for a given number of attempts try to download a file
                while cnt < self._attempsToDownloadFile:
                    cnt += 1
                    fname, dummy = urllib.urlretrieve(self._url + fname, name_dest)
                    Qt.QCoreApplication.processEvents()
                    bmd5_ok = self.check_md5(md5, fname)

                    if bmd5_ok:
                        break

                if bmd5_ok:
                    self.ui.textEditInfo.append("Downloaded file: " + name_dest)
                else:
                    s = "Failed to download file: " + name_dest
                    s += ' MD5SUM do not match'
                    self.ui.textEditInfo.append(s)

                processed_files += 1
                self.ui.progressBar.setValue(processed_files)
                Qt.QCoreApplication.processEvents()

        return 0