Python urllib.urlcleanup() Examples

The following are code examples for showing how to use urllib.urlcleanup(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: tools-artbio   Author: ARTbio   File: fetch_fasta_from_NCBI.py    MIT License 6 votes vote down vote up
def get_sequences(self):
        batch_size = 200
        count = self.count
        uids_list = self.ids
        self.logger.info("Batch size for efetch action: %d" % batch_size)
        self.logger.info("Number of batches for efetch action: %d" %
                         ((count / batch_size) + 1))
        with open(self.outname, 'w') as out:
            for start in range(0, count, batch_size):
                end = min(count, start+batch_size)
                batch = uids_list[start:end]
                self.logger.info("retrieving batch %d" %
                                 ((start / batch_size) + 1))
                try:
                    mfasta = self.efetch(self.dbname, ','.join(batch))
                    out.write(mfasta + '\n')
                except QueryException as e:
                    self.logger.error("%s" % e.message)
                    raise e
        urllib.urlcleanup() 
Example 2
Project: datanator   Author: KarrLab   File: core.py    MIT License 6 votes vote down vote up
def download_fastq(experiment_name,  sample_name, temp_directory, fastq_urls):
    FASTQ_DIR = "{}/FASTQ_FILES".format(temp_directory)
    if not os.path.isdir(FASTQ_DIR):
        os.makedirs(FASTQ_DIR)
    for num, url in enumerate(fastq_urls.split(" ")):
        print("starting {}".format(num))
        file_name = '{}/{}__{}__{}.fastq.gz'.format(FASTQ_DIR, experiment_name, sample_name, num)
        file_must_be_downloaded = False
        if os.path.isfile(file_name):
            try:
                with gzip.open(file_name, 'rb') as f:
                    file_content = f.read()
                file_must_be_downloaded = False
            except:
                file_must_be_downloaded = True
        else:
            file_must_be_downloaded = True
        if file_must_be_downloaded:
            file = urlretrieve(url, file_name)  # there used to be a space after "gz". I removed it
            urllib.urlcleanup()
        else:
            pass
        print(file_must_be_downloaded)
        print("done with {}".format(num)) 
Example 3
Project: Hot   Author: dsolimando   File: regrtest.py    GNU General Public License v3.0 5 votes vote down vote up
def dash_R_cleanup(fs, ps, pic):
    import gc, copy_reg
    import _strptime, linecache, dircache
    import urlparse, urllib, urllib2, mimetypes, doctest
    import struct, filecmp
    from distutils.dir_util import _path_created

    # Restore some original values.
    warnings.filters[:] = fs
    copy_reg.dispatch_table.clear()
    copy_reg.dispatch_table.update(ps)
    sys.path_importer_cache.clear()
    sys.path_importer_cache.update(pic)

    # Clear assorted module caches.
    _path_created.clear()
    re.purge()
    _strptime._regex_cache.clear()
    urlparse.clear_cache()
    urllib.urlcleanup()
    urllib2.install_opener(None)
    dircache.reset()
    linecache.clearcache()
    mimetypes._default_mime_types()
    struct._cache.clear()
    filecmp._cache.clear()
    doctest.master = None

    # Collect cyclic trash.
    gc.collect() 
Example 4
Project: pyblish-win   Author: pyblish   File: regrtest.py    GNU Lesser General Public License v3.0 4 votes vote down vote up
def dash_R_cleanup(fs, ps, pic, zdc, abcs):
    import gc, copy_reg
    import _strptime, linecache
    dircache = test_support.import_module('dircache', deprecated=True)
    import urlparse, urllib, urllib2, mimetypes, doctest
    import struct, filecmp
    from distutils.dir_util import _path_created

    # Clear the warnings registry, so they can be displayed again
    for mod in sys.modules.values():
        if hasattr(mod, '__warningregistry__'):
            del mod.__warningregistry__

    # Restore some original values.
    warnings.filters[:] = fs
    copy_reg.dispatch_table.clear()
    copy_reg.dispatch_table.update(ps)
    sys.path_importer_cache.clear()
    sys.path_importer_cache.update(pic)
    try:
        import zipimport
    except ImportError:
        pass # Run unmodified on platforms without zipimport support
    else:
        zipimport._zip_directory_cache.clear()
        zipimport._zip_directory_cache.update(zdc)

    # clear type cache
    sys._clear_type_cache()

    # Clear ABC registries, restoring previously saved ABC registries.
    for abc, registry in abcs.items():
        abc._abc_registry = registry.copy()
        abc._abc_cache.clear()
        abc._abc_negative_cache.clear()

    # Clear assorted module caches.
    _path_created.clear()
    re.purge()
    _strptime._regex_cache.clear()
    urlparse.clear_cache()
    urllib.urlcleanup()
    urllib2.install_opener(None)
    dircache.reset()
    linecache.clearcache()
    mimetypes._default_mime_types()
    filecmp._cache.clear()
    struct._clearcache()
    doctest.master = None
    try:
        import ctypes
    except ImportError:
        # Don't worry about resetting the cache if ctypes is not supported
        pass
    else:
        ctypes._reset_cache()

    # Collect cyclic trash.
    gc.collect() 
Example 5
Project: oss-ftp   Author: aliyun   File: regrtest.py    MIT License 4 votes vote down vote up
def dash_R_cleanup(fs, ps, pic, zdc, abcs):
    import gc, copy_reg
    import _strptime, linecache
    dircache = test_support.import_module('dircache', deprecated=True)
    import urlparse, urllib, urllib2, mimetypes, doctest
    import struct, filecmp
    from distutils.dir_util import _path_created

    # Clear the warnings registry, so they can be displayed again
    for mod in sys.modules.values():
        if hasattr(mod, '__warningregistry__'):
            del mod.__warningregistry__

    # Restore some original values.
    warnings.filters[:] = fs
    copy_reg.dispatch_table.clear()
    copy_reg.dispatch_table.update(ps)
    sys.path_importer_cache.clear()
    sys.path_importer_cache.update(pic)
    try:
        import zipimport
    except ImportError:
        pass # Run unmodified on platforms without zipimport support
    else:
        zipimport._zip_directory_cache.clear()
        zipimport._zip_directory_cache.update(zdc)

    # clear type cache
    sys._clear_type_cache()

    # Clear ABC registries, restoring previously saved ABC registries.
    for abc, registry in abcs.items():
        abc._abc_registry = registry.copy()
        abc._abc_cache.clear()
        abc._abc_negative_cache.clear()

    # Clear assorted module caches.
    _path_created.clear()
    re.purge()
    _strptime._regex_cache.clear()
    urlparse.clear_cache()
    urllib.urlcleanup()
    urllib2.install_opener(None)
    dircache.reset()
    linecache.clearcache()
    mimetypes._default_mime_types()
    filecmp._cache.clear()
    struct._clearcache()
    doctest.master = None
    try:
        import ctypes
    except ImportError:
        # Don't worry about resetting the cache if ctypes is not supported
        pass
    else:
        ctypes._reset_cache()

    # Collect cyclic trash.
    gc.collect() 
Example 6
Project: Penetration-Testing-Study-Notes   Author: AnasAboureada   File: scythe.py    MIT License 4 votes vote down vote up
def get_request(test):
    # perform GET request

    urllib.urlcleanup() # clear cache

    try:
        user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        req_headers = { 'User-Agent' : user_agent }
        for each in test['headers']:
            key, val = each.split(":", 1)
            key = key.lstrip()
            val = val.lstrip()
            req_headers[key] = val
        if test['requestCookie'] or test['requestCSRF']:
            # request cookie and csrf token if set in module XML
            cookie_val, csrf_val = request_value(test)
            if cookie_val:
                req_headers['cookie'] = cookie_val
            if csrf_val:
                # replace <CSRFTOKEN> with the collected token
                test['url'] = test['url'].replace("<CSRFTOKEN>", csrf_val)
                test['postParameters'] = test['postParameters'].replace("<CSRFTOKEN>", csrf_val)
                test['headers'] = [h.replace('<CSRFTOKEN>', csrf_val) for h in test['headers']]

        if opts.debug:
            # print debug output
            print textwrap.fill((" [ ] URL (GET): %s" % test['url']),
                initial_indent='', subsequent_indent=' -> ', width=80)
            print

        # assign NullHTTPErrorProcessor as default opener
        opener = urllib2.build_opener(NullHTTPErrorProcessor())
        urllib2.install_opener(opener)

        req = urllib2.Request(test['url'], headers=req_headers)
        f = urllib2.urlopen(req)
        r_body = f.read()
        r_info = f.info()
        f.close()

        # handle instances where the response body is 0 bytes in length
        if not r_body:
            print " [" + color['red'] + "!" + color['end'] + "] Zero byte response received from %s" \
                % test['name']
            r_body = "<Scythe Message: Empty response from server>"

        # returned updated test and response data
        return test, r_body, r_info, req

    except Exception:
        print textwrap.fill((" [" + color['red'] + "!" + color['end'] + "] Error contacting %s" \
            % test['url']), initial_indent='', subsequent_indent='\t', width=80)
        if opts.debug:
            for ex in traceback.format_exc().splitlines():
                print textwrap.fill((" %s" \
                    % str(ex)), initial_indent='', subsequent_indent='\t', width=80)
            print
        return test, False, False, req 
Example 7
Project: Offensive-Security-Certified-Professional   Author: StevenDias33   File: scythe.py    MIT License 4 votes vote down vote up
def get_request(test):
    # perform GET request

    urllib.urlcleanup() # clear cache

    try:
        user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        req_headers = { 'User-Agent' : user_agent }
        for each in test['headers']:
            key, val = each.split(":", 1)
            key = key.lstrip()
            val = val.lstrip()
            req_headers[key] = val
        if test['requestCookie'] or test['requestCSRF']:
            # request cookie and csrf token if set in module XML
            cookie_val, csrf_val = request_value(test)
            if cookie_val:
                req_headers['cookie'] = cookie_val
            if csrf_val:
                # replace <CSRFTOKEN> with the collected token
                test['url'] = test['url'].replace("<CSRFTOKEN>", csrf_val)
                test['postParameters'] = test['postParameters'].replace("<CSRFTOKEN>", csrf_val)
                test['headers'] = [h.replace('<CSRFTOKEN>', csrf_val) for h in test['headers']]

        if opts.debug:
            # print debug output
            print textwrap.fill((" [ ] URL (GET): %s" % test['url']),
                initial_indent='', subsequent_indent=' -> ', width=80)
            print

        # assign NullHTTPErrorProcessor as default opener
        opener = urllib2.build_opener(NullHTTPErrorProcessor())
        urllib2.install_opener(opener)

        req = urllib2.Request(test['url'], headers=req_headers)
        f = urllib2.urlopen(req)
        r_body = f.read()
        r_info = f.info()
        f.close()

        # handle instances where the response body is 0 bytes in length
        if not r_body:
            print " [" + color['red'] + "!" + color['end'] + "] Zero byte response received from %s" \
                % test['name']
            r_body = "<Scythe Message: Empty response from server>"

        # returned updated test and response data
        return test, r_body, r_info, req

    except Exception:
        print textwrap.fill((" [" + color['red'] + "!" + color['end'] + "] Error contacting %s" \
            % test['url']), initial_indent='', subsequent_indent='\t', width=80)
        if opts.debug:
            for ex in traceback.format_exc().splitlines():
                print textwrap.fill((" %s" \
                    % str(ex)), initial_indent='', subsequent_indent='\t', width=80)
            print
        return test, False, False, req 
Example 8
Project: linux-cross-gcc   Author: nmercier   File: regrtest.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def dash_R_cleanup(fs, ps, pic, zdc, abcs):
    import gc, copy_reg
    import _strptime, linecache
    dircache = test_support.import_module('dircache', deprecated=True)
    import urlparse, urllib, urllib2, mimetypes, doctest
    import struct, filecmp
    from distutils.dir_util import _path_created

    # Clear the warnings registry, so they can be displayed again
    for mod in sys.modules.values():
        if hasattr(mod, '__warningregistry__'):
            del mod.__warningregistry__

    # Restore some original values.
    warnings.filters[:] = fs
    copy_reg.dispatch_table.clear()
    copy_reg.dispatch_table.update(ps)
    sys.path_importer_cache.clear()
    sys.path_importer_cache.update(pic)
    try:
        import zipimport
    except ImportError:
        pass # Run unmodified on platforms without zipimport support
    else:
        zipimport._zip_directory_cache.clear()
        zipimport._zip_directory_cache.update(zdc)

    # clear type cache
    sys._clear_type_cache()

    # Clear ABC registries, restoring previously saved ABC registries.
    for abc, registry in abcs.items():
        abc._abc_registry = registry.copy()
        abc._abc_cache.clear()
        abc._abc_negative_cache.clear()

    # Clear assorted module caches.
    _path_created.clear()
    re.purge()
    _strptime._regex_cache.clear()
    urlparse.clear_cache()
    urllib.urlcleanup()
    urllib2.install_opener(None)
    dircache.reset()
    linecache.clearcache()
    mimetypes._default_mime_types()
    filecmp._cache.clear()
    struct._clearcache()
    doctest.master = None
    try:
        import ctypes
    except ImportError:
        # Don't worry about resetting the cache if ctypes is not supported
        pass
    else:
        ctypes._reset_cache()

    # Collect cyclic trash.
    gc.collect() 
Example 9
Project: pentest-notes   Author: wwong99   File: scythe.py    MIT License 4 votes vote down vote up
def get_request(test):
    # perform GET request

    urllib.urlcleanup() # clear cache

    try:
        user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        req_headers = { 'User-Agent' : user_agent }
        for each in test['headers']:
            key, val = each.split(":", 1)
            key = key.lstrip()
            val = val.lstrip()
            req_headers[key] = val
        if test['requestCookie'] or test['requestCSRF']:
            # request cookie and csrf token if set in module XML
            cookie_val, csrf_val = request_value(test)
            if cookie_val:
                req_headers['cookie'] = cookie_val
            if csrf_val:
                # replace <CSRFTOKEN> with the collected token
                test['url'] = test['url'].replace("<CSRFTOKEN>", csrf_val)
                test['postParameters'] = test['postParameters'].replace("<CSRFTOKEN>", csrf_val)
                test['headers'] = [h.replace('<CSRFTOKEN>', csrf_val) for h in test['headers']]

        if opts.debug:
            # print debug output
            print textwrap.fill((" [ ] URL (GET): %s" % test['url']),
                initial_indent='', subsequent_indent=' -> ', width=80)
            print

        # assign NullHTTPErrorProcessor as default opener
        opener = urllib2.build_opener(NullHTTPErrorProcessor())
        urllib2.install_opener(opener)

        req = urllib2.Request(test['url'], headers=req_headers)
        f = urllib2.urlopen(req)
        r_body = f.read()
        r_info = f.info()
        f.close()

        # handle instances where the response body is 0 bytes in length
        if not r_body:
            print " [" + color['red'] + "!" + color['end'] + "] Zero byte response received from %s" \
                % test['name']
            r_body = "<Scythe Message: Empty response from server>"

        # returned updated test and response data
        return test, r_body, r_info, req

    except Exception:
        print textwrap.fill((" [" + color['red'] + "!" + color['end'] + "] Error contacting %s" \
            % test['url']), initial_indent='', subsequent_indent='\t', width=80)
        if opts.debug:
            for ex in traceback.format_exc().splitlines():
                print textwrap.fill((" %s" \
                    % str(ex)), initial_indent='', subsequent_indent='\t', width=80)
            print
        return test, False, False, req