Python gzip.GzipFile() Examples

The following are 30 code examples of gzip.GzipFile(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gzip , or try the search function .
Example #1
Source File: test_io.py    From recruit with Apache License 2.0 7 votes vote down vote up
def test_gzip_loadtxt():
    # Thanks to another windows brokenness, we can't use
    # NamedTemporaryFile: a file created from this function cannot be
    # reopened by another open call. So we first put the gzipped string
    # of the test reference array, write it to a securely opened file,
    # which is then read from by the loadtxt function
    s = BytesIO()
    g = gzip.GzipFile(fileobj=s, mode='w')
    g.write(b'1 2 3\n')
    g.close()

    s.seek(0)
    with temppath(suffix='.gz') as name:
        with open(name, 'wb') as f:
            f.write(s.read())
        res = np.loadtxt(name)
    s.close()

    assert_array_equal(res, [1, 2, 3]) 
Example #2
Source File: client.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def gzip_decode(data):
    """gzip encoded data -> unencoded data

    Decode data using the gzip content encoding as described in RFC 1952
    """
    if not gzip:
        raise NotImplementedError
    f = BytesIO(data)
    gzf = gzip.GzipFile(mode="rb", fileobj=f)
    try:
        decoded = gzf.read()
    except IOError:
        raise ValueError("invalid data")
    f.close()
    gzf.close()
    return decoded

##
# Return a decoded file-like object for the gzip encoding
# as described in RFC 1952.
#
# @param response A stream supporting a read() method
# @return a file-like object that the decoded data can be read() from 
Example #3
Source File: http.py    From ftw with Apache License 2.0 6 votes vote down vote up
def parse_content_encoding(self, response_headers, response_data):
        """
        Parses a response that contains Content-Encoding to retrieve
        response_data
        """
        if response_headers['content-encoding'] == 'gzip':
            buf = StringIO.StringIO(response_data)
            zipbuf = gzip.GzipFile(fileobj=buf)
            response_data = zipbuf.read()
        elif response_headers['content-encoding'] == 'deflate':
            data = StringIO.StringIO(zlib.decompress(response_data))
            response_data = data.read()
        else:
            raise errors.TestError(
                'Received unknown Content-Encoding',
                {
                    'content-encoding':
                        str(response_headers['content-encoding']),
                    'function': 'http.HttpResponse.parse_content_encoding'
                })
        return response_data 
Example #4
Source File: gtest_parallel.py    From gtest-parallel with Apache License 2.0 6 votes vote down vote up
def write_to_file(self, save_file):
    "Write all the times to file."
    try:
      with TestTimes.LockedFile(save_file, 'a+b') as fd:
        times = TestTimes.__read_test_times_file(fd)

        if times is None:
          times = self.__times
        else:
          times.update(self.__times)

        # We erase data from file while still holding a lock to it. This
        # way reading old test times and appending new ones are atomic
        # for external viewer.
        fd.seek(0)
        fd.truncate()
        with gzip.GzipFile(fileobj=fd, mode='wb') as gzf:
          cPickle.dump(times, gzf, PICKLE_HIGHEST_PROTOCOL)
    except IOError:
      pass  # ignore errors---saving the times isn't that important 
Example #5
Source File: vcfio_test.py    From gcp-variant-transforms with Apache License 2.0 6 votes vote down vote up
def test_write_dataflow_header(self):
    pipeline = TestPipeline()
    pcoll = pipeline | 'Create' >> beam.Create(self.variants, reshuffle=False)
    headers = ['foo\n']
    _ = pcoll | 'Write' >> vcfio.WriteToVcf(
        self.path + '.gz',
        compression_type=CompressionTypes.AUTO,
        headers=headers)
    pipeline.run()

    read_result = []
    for file_name in glob.glob(self.path + '*'):
      with gzip.GzipFile(file_name, 'r') as f:
        read_result.extend(f.read().splitlines())

    self.assertEqual(read_result[0], 'foo')
    for actual, expected in zip(read_result[1:], self.variant_lines):
      self._assert_variant_lines_equal(actual, expected) 
Example #6
Source File: zooqle.py    From search-plugins with GNU General Public License v2.0 6 votes vote down vote up
def retrieve_url_nodecode(url):
    """ Return the content of the url page as a string """
    req = Request(url, headers=headers)
    try:
        response = urlopen(req)
    except URLError as errno:
        print(" ".join(("Connection error:", str(errno.reason))))
        print(" ".join(("URL:", url)))
        return ""
    dat = response.read()
    # Check if it is gzipped
    if dat[:2] == '\037\213':
        # Data is gzip encoded, decode it
        compressedstream = StringIO(dat)
        gzipper = gzip.GzipFile(fileobj=compressedstream)
        extracted_data = gzipper.read()
        dat = extracted_data
        return dat
    return dat 
Example #7
Source File: client.py    From verge3d-blender-addon with GNU General Public License v3.0 6 votes vote down vote up
def gzip_encode(data):
    """data -> gzip encoded data

    Encode data using the gzip content encoding as described in RFC 1952
    """
    if not gzip:
        raise NotImplementedError
    f = BytesIO()
    gzf = gzip.GzipFile(mode="wb", fileobj=f, compresslevel=1)
    gzf.write(data)
    gzf.close()
    encoded = f.getvalue()
    f.close()
    return encoded

##
# Decode a string using the gzip content encoding such as specified by the
# Content-Encoding: gzip
# in the HTTP header, as described in RFC 1952
#
# @param data The encoded data
# @return the unencoded data
# @raises ValueError if data is not correctly coded. 
Example #8
Source File: client.py    From verge3d-blender-addon with GNU General Public License v3.0 6 votes vote down vote up
def gzip_decode(data):
    """gzip encoded data -> unencoded data

    Decode data using the gzip content encoding as described in RFC 1952
    """
    if not gzip:
        raise NotImplementedError
    f = BytesIO(data)
    gzf = gzip.GzipFile(mode="rb", fileobj=f)
    try:
        decoded = gzf.read()
    except IOError:
        raise ValueError("invalid data")
    f.close()
    gzf.close()
    return decoded

##
# Return a decoded file-like object for the gzip encoding
# as described in RFC 1952.
#
# @param response A stream supporting a read() method
# @return a file-like object that the decoded data can be read() from 
Example #9
Source File: zooqle.py    From search-plugins with GNU General Public License v2.0 6 votes vote down vote up
def retrieve_url_nodecode(url):
    """ Return the content of the url page as a string """
    req = Request(url, headers=headers)
    try:
        response = urlopen(req)
    except URLError as errno:
        print(" ".join(("Connection error:", str(errno.reason))))
        print(" ".join(("URL:", url)))
        return ""
    dat = response.read()
    # Check if it is gzipped
    if dat[:2] == '\037\213':
        # Data is gzip encoded, decode it
        compressedstream = StringIO(dat)
        gzipper = gzip.GzipFile(fileobj=compressedstream)
        extracted_data = gzipper.read()
        dat = extracted_data
        return dat
    return dat 
Example #10
Source File: ServerConnection.py    From 3vilTwinAttacker with MIT License 6 votes vote down vote up
def handleResponse(self, data):
        if (self.isCompressed):
            logging.debug("Decompressing content...")
            data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(data)).read()
            
        logging.log(self.getLogLevel(), "Read from server:\n" + data)
        #logging.log(self.getLogLevel(), "Read from server:\n <large data>" )


        data = self.replaceSecureLinks(data)

        if (self.contentLength != None):
            self.client.setHeader('Content-Length', len(data))
        
        self.client.write(data)
        self.shutdown() 
Example #11
Source File: compression.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def decompress(cls, data):
        '''Decompress gzip-compressed data `data`.

        It will perform basic validation, then return the decompressed
        data or raises ValueError exception for invalid `data`.

        :param data: Gzip-compressed data to decompress.
        :type data: ``bytes``
        :returns: decompressed data.
        :rtype: ``string``

        :raises ValueError: If `data` is not in gzip format
        '''

        if not cls.check_format(data):
            raise ValueError('File is not gzip format.')

        return gzip.GzipFile(fileobj=BytesIO(data),
                             mode='rb').read() 
Example #12
Source File: __init__.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def _decompressContent(response, new_content):
    content = new_content
    try:
        encoding = response.get("content-encoding", None)
        if encoding in ["gzip", "deflate"]:
            if encoding == "gzip":
                content = gzip.GzipFile(fileobj=io.BytesIO(new_content)).read()
            if encoding == "deflate":
                content = zlib.decompress(content, -zlib.MAX_WBITS)
            response["content-length"] = str(len(content))
            # Record the historical presence of the encoding in a way the won't interfere.
            response["-content-encoding"] = response["content-encoding"]
            del response["content-encoding"]
    except (IOError, zlib.error):
        content = ""
        raise FailedToDecompressContent(
            _("Content purported to be compressed with %s but failed to decompress.")
            % response.get("content-encoding"),
            response,
            content,
        )
    return content 
Example #13
Source File: client.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def gzip_encode(data):
    """data -> gzip encoded data

    Encode data using the gzip content encoding as described in RFC 1952
    """
    if not gzip:
        raise NotImplementedError
    f = BytesIO()
    gzf = gzip.GzipFile(mode="wb", fileobj=f, compresslevel=1)
    gzf.write(data)
    gzf.close()
    encoded = f.getvalue()
    f.close()
    return encoded

##
# Decode a string using the gzip content encoding such as specified by the
# Content-Encoding: gzip
# in the HTTP header, as described in RFC 1952
#
# @param data The encoded data
# @return the unencoded data
# @raises ValueError if data is not correctly coded. 
Example #14
Source File: client.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def gzip_decode(data):
    """gzip encoded data -> unencoded data

    Decode data using the gzip content encoding as described in RFC 1952
    """
    if not gzip:
        raise NotImplementedError
    f = BytesIO(data)
    gzf = gzip.GzipFile(mode="rb", fileobj=f)
    try:
        decoded = gzf.read()
    except IOError:
        raise ValueError("invalid data")
    f.close()
    gzf.close()
    return decoded

##
# Return a decoded file-like object for the gzip encoding
# as described in RFC 1952.
#
# @param response A stream supporting a read() method
# @return a file-like object that the decoded data can be read() from 
Example #15
Source File: compression.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def decompress(cls, data):
        '''Decompress gzip-compressed data `data`.

        It will perform basic validation, then return the decompressed
        data or raises ValueError exception for invalid `data`.

        :param data: Gzip-compressed data to decompress.
        :type data: ``bytes``
        :returns: decompressed data.
        :rtype: ``string``

        :raises ValueError: If `data` is not in gzip format
        '''

        if not cls.check_format(data):
            raise ValueError('File is not gzip format.')

        return gzip.GzipFile(fileobj=BytesIO(data),
                             mode='rb').read() 
Example #16
Source File: file_util.py    From snowflake-connector-python with Apache License 2.0 6 votes vote down vote up
def compress_file_with_gzip(file_name, tmp_dir):
        """Compresses a file with GZIP.

        Args:
            file_name: Local path to file to be compressed.
            tmp_dir: Temporary directory where an GZIP file will be created.

        Returns:
            A tuple of gzip file name and size.
        """
        logger = getLogger(__name__)
        base_name = os.path.basename(file_name)
        gzip_file_name = os.path.join(tmp_dir, base_name + '_c.gz')
        logger.debug('gzip file: %s, original file: %s', gzip_file_name,
                     file_name)
        fr = open(file_name, 'rb')
        fw = gzip.GzipFile(gzip_file_name, 'wb')
        shutil.copyfileobj(fr, fw)
        fw.close()
        fr.close()
        SnowflakeFileUtil.normalize_gzip_header(gzip_file_name)

        statinfo = os.stat(gzip_file_name)
        return gzip_file_name, statinfo.st_size 
Example #17
Source File: __init__.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def _decompressContent(response, new_content):
    content = new_content
    try:
        encoding = response.get("content-encoding", None)
        if encoding in ["gzip", "deflate"]:
            if encoding == "gzip":
                content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
            if encoding == "deflate":
                content = zlib.decompress(content, -zlib.MAX_WBITS)
            response["content-length"] = str(len(content))
            # Record the historical presence of the encoding in a way the won't interfere.
            response["-content-encoding"] = response["content-encoding"]
            del response["content-encoding"]
    except (IOError, zlib.error):
        content = ""
        raise FailedToDecompressContent(
            _("Content purported to be compressed with %s but failed to decompress.")
            % response.get("content-encoding"),
            response,
            content,
        )
    return content 
Example #18
Source File: client.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def gzip_encode(data):
    """data -> gzip encoded data

    Encode data using the gzip content encoding as described in RFC 1952
    """
    if not gzip:
        raise NotImplementedError
    f = BytesIO()
    gzf = gzip.GzipFile(mode="wb", fileobj=f, compresslevel=1)
    gzf.write(data)
    gzf.close()
    encoded = f.getvalue()
    f.close()
    return encoded

##
# Decode a string using the gzip content encoding such as specified by the
# Content-Encoding: gzip
# in the HTTP header, as described in RFC 1952
#
# @param data The encoded data
# @return the unencoded data
# @raises ValueError if data is not correctly coded. 
Example #19
Source File: billing.py    From aegea with Apache License 2.0 6 votes vote down vote up
def ls(args):
    bucket = resources.s3.Bucket(args.billing_reports_bucket.format(account_id=ARN.get_account_id()))
    now = datetime.utcnow()
    year = args.year or now.year
    month = str(args.month or now.month).zfill(2)
    next_year = year + ((args.month or now.month) + 1) // 12
    next_month = str(((args.month or now.month) + 1) % 12).zfill(2)
    manifest_name = "aegea/{report}/{yr}{mo}01-{next_yr}{next_mo}01/{report}-Manifest.json"
    manifest_name = manifest_name.format(report=__name__, yr=year, mo=month, next_yr=next_year, next_mo=next_month)
    try:
        manifest = json.loads(bucket.Object(manifest_name).get().get("Body").read())
        for report_key in manifest["reportKeys"]:
            report = BytesIO(bucket.Object(report_key).get().get("Body").read())
            with gzip.GzipFile(fileobj=report) as fh:
                reader = csv.DictReader(fh)
                for line in reader:
                    page_output(tabulate(filter_line_items(reader, args), args))
    except ClientError as e:
        msg = 'Unable to get report {} from {}: {}. Run "aegea billing configure" to enable reports.'
        raise AegeaException(msg.format(manifest_name, bucket, e)) 
Example #20
Source File: proxycrawl_api.py    From proxycrawl-python with Apache License 2.0 5 votes vote down vote up
def decompressBody(self):
        body_stream = BytesIO(self.handler.read())
        body_gzip = gzip.GzipFile(fileobj=body_stream)

        return body_gzip.read() 
Example #21
Source File: usps.py    From cycada_release with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def read_data(self, path):
        images = []
        targets = []
        with gzip.GzipFile(path, 'r') as f:
            for line in f:
                split = line.strip().split()
                label = int(float(split[0]))
                pixels = np.array([(float(x) + 1) / 2 for x in split[1:]]) * 255
                num_pix = self.params.image_size
                pixels = pixels.reshape(num_pix, num_pix).astype('uint8')
                img = Image.fromarray(pixels, mode='L')
                images.append(img)
                targets.append(label)
        return images, targets 
Example #22
Source File: httpserver_test.py    From tornado-zh with MIT License 5 votes vote down vote up
def post_gzip(self, body):
        bytesio = BytesIO()
        gzip_file = gzip.GzipFile(mode='w', fileobj=bytesio)
        gzip_file.write(utf8(body))
        gzip_file.close()
        compressed_body = bytesio.getvalue()
        return self.fetch('/', method='POST', body=compressed_body,
                          headers={'Content-Encoding': 'gzip'}) 
Example #23
Source File: web_test.py    From tornado-zh with MIT License 5 votes vote down vote up
def test_flow_control_compressed_body(self):
        bytesio = BytesIO()
        gzip_file = gzip.GzipFile(mode='w', fileobj=bytesio)
        gzip_file.write(b'abcdefghijklmnopqrstuvwxyz')
        gzip_file.close()
        compressed_body = bytesio.getvalue()
        response = self.fetch('/', body=compressed_body, method='POST',
                              headers={'Content-Encoding': 'gzip'})
        response.rethrow()
        self.assertEqual(json_decode(response.body),
                         dict(methods=['prepare', 'data_received',
                                       'data_received', 'data_received',
                                       'post'])) 
Example #24
Source File: simple_httpclient_test.py    From tornado-zh with MIT License 5 votes vote down vote up
def test_gzip(self):
        # All the tests in this file should be using gzip, but this test
        # ensures that it is in fact getting compressed.
        # Setting Accept-Encoding manually bypasses the client's
        # decompression so we can see the raw data.
        response = self.fetch("/chunk", use_gzip=False,
                              headers={"Accept-Encoding": "gzip"})
        self.assertEqual(response.headers["Content-Encoding"], "gzip")
        self.assertNotEqual(response.body, b"asdfqwer")
        # Our test data gets bigger when gzipped.  Oops.  :)
        # Chunked encoding bypasses the MIN_LENGTH check.
        self.assertEqual(len(response.body), 34)
        f = gzip.GzipFile(mode="r", fileobj=response.buffer)
        self.assertEqual(f.read(), b"asdfqwer") 
Example #25
Source File: httpserver_test.py    From tornado-zh with MIT License 5 votes vote down vote up
def compress(self, body):
        bytesio = BytesIO()
        gzfile = gzip.GzipFile(mode='w', fileobj=bytesio)
        gzfile.write(body)
        gzfile.close()
        compressed = bytesio.getvalue()
        if len(compressed) >= len(body):
            raise Exception("body did not shrink when compressed")
        return compressed 
Example #26
Source File: build_docker_image.py    From aegea with Apache License 2.0 5 votes vote down vote up
def encode_dockerfile(args):
    with io.BytesIO() as buf:
        with gzip.GzipFile(fileobj=buf, mode="wb") as gz:
            gz.write(get_dockerfile(args))
            gz.close()
        return base64.b64encode(buf.getvalue()).decode() 
Example #27
Source File: gtest_parallel.py    From gtest-parallel with Apache License 2.0 5 votes vote down vote up
def __read_test_times_file(fd):
    try:
      with gzip.GzipFile(fileobj=fd, mode='rb') as gzf:
        times = cPickle.load(gzf)
    except Exception:
      # File doesn't exist, isn't readable, is malformed---whatever.
      # Just ignore it.
      return None
    else:
      return times 
Example #28
Source File: word2vecReaderUtils.py    From word2vec-twitter with MIT License 5 votes vote down vote up
def make_closing(base, **attrs):
    """
    Add support for `with Base(attrs) as fout:` to the base class if it's missing.
    The base class' `close()` method will be called on context exit, to always close the file properly.

    This is needed for gzip.GzipFile, bz2.BZ2File etc in older Pythons (<=2.6), which otherwise
    raise "AttributeError: GzipFile instance has no attribute '__exit__'".

    """
    if not hasattr(base, '__enter__'):
        attrs['__enter__'] = lambda self: self
    if not hasattr(base, '__exit__'):
        attrs['__exit__'] = lambda self, type, value, traceback: self.close()
    return type('Closing' + base.__name__, (base, object), attrs) 
Example #29
Source File: mnist.py    From mxbox with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def download(self):
        """Download the MNIST data if it doesn't exist in processed_folder already."""
        from six.moves import urllib
        import gzip

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            # os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in self.urls:
            print('Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())
            # with open(file_path.replace('.gz', ''), 'wb') as out_f, \
            #         gzip.GzipFile(file_path) as zip_f:
            #     out_f.write(zip_f.read())
            # os.unlink(file_path)

        # finish downloading
        # process and save as torch files
        print('Processing...')



        print('Done!') 
Example #30
Source File: bench_higgs_boson.py    From pygbm with MIT License 5 votes vote down vote up
def load_data():
    filename = os.path.join(HERE, URL.rsplit('/', 1)[-1])
    if not os.path.exists(filename):
        print(f"Downloading {URL} to {filename} (2.6 GB)...")
        urlretrieve(URL, filename)
        print("done.")

    print(f"Parsing {filename}...")
    tic = time()
    with GzipFile(filename) as f:
        df = pd.read_csv(f, header=None, dtype=np.float32)
    toc = time()
    print(f"Loaded {df.values.nbytes / 1e9:0.3f} GB in {toc - tic:0.3f}s")
    return df