Python gzip.decompress() Examples

The following are 30 code examples of gzip.decompress(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gzip , or try the search function .
Example #1
Source File: feature.py    From igv-reports with MIT License 7 votes vote down vote up
def getstream(file):

    # TODO -- gcs

    if file.startswith('http://') or file.startswith('https://'):
        response = requests.get(file)
        status_code = response.status_code    #TODO Do something with this

        if file.endswith('.gz'):
            content = response.content
            text = gzip.decompress(content).decode('utf-8')
        else:
            text = response.text
        f = io.StringIO(text)
        return text

    elif file.endswith('.gz'):
        f = gzip.open(file, mode='rt')

    else:
        f = open(file, encoding='UTF-8')

    return f 
Example #2
Source File: utils.py    From esi-knife with MIT License 6 votes vote down vote up
def get_data(uuid):
    """Open and return the character's data."""

    cache_key = "{}{}".format(Keys.complete.value, uuid)
    try:
        content = CACHE.get(cache_key)
    except Exception as error:
        LOG.warning("failed to get %s: %r", cache_key, error)
    else:
        if content is None:
            return None

        try:
            return ujson.loads(decompress(base64.b64decode(content)))
        except Exception as error:
            LOG.warning("failed to decode %s: %r", content, error)
        else:
            CACHE.cache._client.expire(  # pylint: disable=protected-access
                cache_key,
                EXPIRY,
            )

    return None 
Example #3
Source File: profiler_e2e.py    From vprof with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def testRequest(self):
        runner.run(
            self._func, 'p', ('foo', 'bar'), host=_HOST, port=_PORT)
        response = urllib.request.urlopen(
            'http://%s:%s/profile' % (_HOST, _PORT))
        response_data = gzip.decompress(response.read())
        stats = json.loads(response_data.decode('utf-8'))
        curr_filename = inspect.getabsfile(inspect.currentframe())
        self.assertEqual(stats['p']['objectName'],
                         '_func @ %s (function)' % curr_filename)
        self.assertTrue(len(stats['p']['callStats']) > 0)
        self.assertTrue(stats['p']['totalTime'] > 0)
        self.assertTrue(stats['p']['primitiveCalls'] > 0)
        self.assertTrue(stats['p']['totalCalls'] > 0)


# pylint: enable=missing-docstring, blacklisted-name 
Example #4
Source File: funcaptcha_selenium_callback.py    From python-anticaptcha with MIT License 6 votes vote down vote up
def custom(req, req_body, res, res_body):
        if not req.path:
            return
        if not "arkoselabs" in req.path:
            return
        if not res.headers.get("Content-Type", None) in [
            "text/javascript",
            "application/javascript",
        ]:
            print(
                "Skip invalid content type",
                req.path,
                res.headers.get("Content-Type", None),
            )
            return
        if res.headers["Content-Encoding"] == "gzip":
            del res.headers["Content-Encoding"]
            res_body = gzip.decompress(res_body)
        return res_body + wrapper_code 
Example #5
Source File: code_heatmap_e2e.py    From vprof with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def testRequest(self):
        runner.run(
            self._func, 'h', ('foo', 'bar'), host=_HOST, port=_PORT)
        response = urllib.request.urlopen(
            'http://%s:%s/profile' % (_HOST, _PORT))
        response_data = gzip.decompress(response.read())
        stats = json.loads(response_data.decode('utf-8'))
        self.assertTrue(stats['h']['runTime'] > 0)
        heatmaps = stats['h']['heatmaps']
        curr_filename = inspect.getabsfile(inspect.currentframe())
        self.assertEqual(stats['h']['objectName'],
                         '_func @ %s (function)' % curr_filename)
        self.assertEqual(len(heatmaps), 1)
        self.assertDictEqual(
            heatmaps[0]['executionCount'], {'101': 1, '102': 1})
        self.assertListEqual(
            heatmaps[0]['srcCode'],
            [['line', 100, u'        def _func(foo, bar):\n'],
             ['line', 101, u'            baz = foo + bar\n'],
             ['line', 102, u'            return baz\n']])

# pylint: enable=missing-docstring, blacklisted-name 
Example #6
Source File: update.py    From openSUSE-release-tools with GNU General Public License v2.0 6 votes vote down vote up
def patch_id(repo):
        url = repo + 'repodata/repomd.xml'
        repomd = requests.get(url)
        if not repomd.ok:
            return None
        root = ET.fromstring(repomd.text)

        cs = root.find(
            './/{http://linux.duke.edu/metadata/repo}data[@type="updateinfo"]/{http://linux.duke.edu/metadata/repo}location')
        try:
            url = repo + cs.attrib['href']
        except AttributeError:
            return None

        repomd = requests.get(url).content
        root = ET.fromstring(decompress(repomd))
        return root.find('.//id').text

    # take the first package name we find - often enough correct 
Example #7
Source File: CSVFeedApiModule.py    From content with MIT License 6 votes vote down vote up
def get_feed_content_divided_to_lines(self, url, raw_response):
        """Fetch feed data and divides its content to lines

        Args:
            url: Current feed's url.
            raw_response: The raw response from the feed's url.

        Returns:
            List. List of lines from the feed content.
        """
        if self.feed_url_to_config and self.feed_url_to_config.get(url).get('is_zipped_file'):  # type: ignore
            response_content = gzip.decompress(raw_response.content)
        else:
            response_content = raw_response.content

        return response_content.decode(self.encoding).split('\n') 
Example #8
Source File: toolkit.py    From sisyphus with Mozilla Public License 2.0 6 votes vote down vote up
def load_job(path: str) -> Job:
    """ Load job from job directory even if it is already cleaned up

    :param path(str): Path to job directory
    :return (Job):
    """
    def load_tar(filename):
        with tarfile.open(filename) as tar:
            with tar.extractfile(gs.JOB_SAVE) as f:
                return pickle.loads(gzip.decompress(f.read()))

    if os.path.isfile(path):
        if path.endswith(gs.JOB_FINISHED_ARCHIVE):
            graph = load_tar(path)
        else:
            graph = load_file(path)
    else:
        tmp_path = os.path.join(path, gs.JOB_SAVE)
        if os.path.isfile(tmp_path):
            graph = load_file(tmp_path)
        else:
            tmp_path = os.path.join(path, gs.JOB_FINISHED_ARCHIVE)
            assert os.path.isfile(tmp_path), "Could not find job path or file: %s" % path
            graph = load_tar(tmp_path)
    return graph 
Example #9
Source File: format.py    From hermit with Apache License 2.0 6 votes vote down vote up
def decode_qr_code_data(encoded: bytes) -> str:
    if not isinstance(encoded, (bytes,)):
        raise InvalidSignatureRequest("Can only decode bytes")
    if encoded == b'':
        raise InvalidSignatureRequest("Cannot decode empty bytes")
    try:
        compressed_bytes = b32decode(encoded)
        try:
            decompressed_bytes = decompress(compressed_bytes)
            try:
                data = decompressed_bytes.decode('utf-8')
                return data
            except UnicodeError:
                raise InvalidSignatureRequest("Not valid UTF-8")
        except OSError:
            raise InvalidSignatureRequest("Not gzipped")
    except (TypeError, Base32DecodeError):
        raise InvalidSignatureRequest("Not Base32") 
Example #10
Source File: manager.py    From binaryalert with Apache License 2.0 6 votes vote down vote up
def _inventory_object_iterator(
            bucket: boto3.resource, manifest_path: str) -> Generator[str, None, None]:
        """Yield S3 object keys listed in the inventory.

        Args:
            bucket: BinaryAlert S3 bucket resource
            manifest_path: S3 object key for an inventory manifest.json

        Yields:
            Object keys listed in the inventory
        """
        response = bucket.Object(manifest_path).get()
        manifest = json.loads(response['Body'].read())

        # The manifest contains a list of .csv.gz files, each with a list of object keys
        for record in manifest['files']:
            response = bucket.Object(record['key']).get()
            csv_data = gzip.decompress(response['Body'].read()).decode('utf-8')
            for line in csv_data.strip().split('\n'):
                yield line.split(',')[1].strip('"') 
Example #11
Source File: flame_graph_e2e.py    From vprof with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def testRequest(self):
        runner.run(
            self._func, 'c', ('foo', 'bar'), host=_HOST, port=_PORT)
        response = urllib.request.urlopen(
            'http://%s:%s/profile' % (_HOST, _PORT))
        response_data = gzip.decompress(response.read())
        stats = json.loads(response_data.decode('utf-8'))
        curr_filename = inspect.getabsfile(inspect.currentframe())
        self.assertEqual(stats['c']['objectName'],
                         '_func @ %s (function)' % curr_filename)
        self.assertEqual(
            stats['c']['sampleInterval'], flame_graph._SAMPLE_INTERVAL)
        self.assertTrue(stats['c']['runTime'] > 0)
        self.assertTrue(len(stats['c']['callStats']) >= 0)
        self.assertTrue(stats['c']['totalSamples'] >= 0)

# pylint: enable=missing-docstring, blacklisted-name, protected-access 
Example #12
Source File: coders.py    From gd.py with MIT License 6 votes vote down vote up
def inflate(data: bytes) -> bytes:
    try:
        return gzip.decompress(data)
    except (gzip.BadGzipFile, zlib.error):
        pass

    # fallback and do some other attempts
    for wbits in (zlib.MAX_WBITS | Z_AUTO_HEADER, zlib.MAX_WBITS | Z_GZIP_HEADER, zlib.MAX_WBITS):
        try:
            decompressor = zlib.decompressobj(wbits=wbits)
            data = decompressor.decompress(data) + decompressor.flush()
            return data

        except zlib.error:
            pass

    raise RuntimeError("Failed to decompress data.") 
Example #13
Source File: message.py    From rssant with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def raw_decode(cls, data, content_encoding=None):
        content_encoding = ContentEncoding.of(content_encoding)
        if content_encoding.is_gzip:
            try:
                data = gzip.decompress(data)
            except (ValueError, TypeError):
                raise ActorMessageDecodeError('gzip decompress failed')
        try:
            if content_encoding.is_json:
                data = json.loads(data.decode('utf-8'))
            else:
                data = msgpack.unpackb(data, raw=False)
        except json.JSONDecodeError:
            raise ActorMessageDecodeError('json decode failed')
        except msgpack.UnpackException:
            raise ActorMessageDecodeError('msgpack decode failed')
        return data 
Example #14
Source File: pdb.py    From ssbio with MIT License 6 votes vote down vote up
def download_sifts_xml(pdb_id, outdir='', force_rerun=False):
    """Download the SIFTS file for a PDB ID.

    Args:
        pdb_id (str): PDB ID
        outdir (str): Output directory, current working directory if not specified.
        force_rerun (bool): If the file should be downloaded again even if it exists

    Returns:
        str: Path to downloaded file

    """
    baseURL = 'ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/'
    filename = '{}.xml.gz'.format(pdb_id.lower())

    outfile = op.join(outdir, filename.split('.')[0] + '.sifts.xml')

    if ssbio.utils.force_rerun(flag=force_rerun, outfile=outfile):
        response = urlopen(baseURL + filename)
        with open(outfile, 'wb') as f:
            f.write(gzip.decompress(response.read()))

    return outfile 
Example #15
Source File: fix_dropbox.py    From dropbox_ext4 with MIT License 6 votes vote down vote up
def main():
    # Install the library.
    os.makedirs(os.path.join(INSTALL_PATH, 'lib'), exist_ok=True)

    with open(LIBRARY_PATH, 'wb') as fd:
        fd.write(gzip.decompress(base64.b85decode(ENCODED_LIB_CONTENTS)))
        os.fchmod(fd.fileno(), 0o755)

    # Install the wrapper script.
    os.makedirs(os.path.join(INSTALL_PATH, 'bin'), exist_ok=True)

    with open(SCRIPT_PATH, 'w') as fd:
        fd.write(DROPBOX_WRAPPER_CONTENTS)
        os.fchmod(fd.fileno(), 0o755)

    print("Installed the library and the wrapper script at:\n  %s\n  %s" % (LIBRARY_PATH, SCRIPT_PATH))
    print("(To uninstall, simply delete them.)")

    # Check that the correct 'dropbox' is in the $PATH.
    result = subprocess.check_output(['which', 'dropbox']).decode().rstrip()
    if result != SCRIPT_PATH:
        print()
        print("You will need to fix your $PATH! Currently, %r takes precedence." % result) 
Example #16
Source File: data_loader.py    From DialoGPT with MIT License 6 votes vote down vote up
def __iter__(self):
        keys = self._get_keys()
        if self.shuffle:
            random.shuffle(keys)
        for key in keys:
            chunk = json.loads(gzip.decompress(self.db[key]).decode('utf-8'))
            # discard long examples
            trunc_chunk = []
            lens = []
            for feat in chunk:
                if feat['input_len'] > self.max_len:
                    continue
                trunc_chunk.append(feat)
                lens.append(feat['input_len'])

            dataset = GPT2FeatureDataset(trunc_chunk, self.max_len)
            sampler = BucketSampler(lens, self.bucket_size, self.batch_size,
                                    droplast=True, shuffle=self.shuffle)
            loader = DataLoader(dataset, batch_sampler=sampler,
                                num_workers=0,  # can test multi-worker
                                collate_fn=GPT2FeatureDataset.collate)
            yield from loader 
Example #17
Source File: base64unpack.py    From msticpy with MIT License 6 votes vote down vote up
def get_items_from_gzip(binary: bytes) -> Tuple[str, Dict[str, bytes]]:
    """
    Return decompressed gzip contents.

    Parameters
    ----------
    binary : bytes
        byte array of gz file

    Returns
    -------
    Tuple[str, bytes]
        File type + decompressed file

    """
    archive_file = gzip.decompress(binary)
    return "gz", {"gzip_file": archive_file} 
Example #18
Source File: wad.py    From CDTB with GNU Lesser General Public License v3.0 6 votes vote down vote up
def read_data(self, f):
        """Retrieve (uncompressed) data from WAD file object"""

        f.seek(self.offset)
        # assume files are small enough to fit in memory
        data = f.read(self.compressed_size)
        if self.type == 0:
            return data
        elif self.type == 1:
            return gzip.decompress(data)
        elif self.type == 2:
            n, = struct.unpack('<L', data[:4])
            target = data[4:4+n].rstrip(b'\0').decode('utf-8')
            logger.debug(f"file redirection: {target}")
            return None
        elif self.type == 3:
            return zstd_decompress(data)
        raise ValueError(f"unsupported file type: {self.type}") 
Example #19
Source File: HackRequests.py    From hack-requests with MIT License 6 votes vote down vote up
def content(self):
        if self._content:
            return self._content
        encode = self.rep.msg.get('content-encoding', None)
        try:
            body = self.rep.read()
        except socket.timeout:
            body = b''
        if encode == 'gzip':
            body = gzip.decompress(body)
        elif encode == 'deflate':
            try:
                body = zlib.decompress(body, -zlib.MAX_WBITS)
            except:
                body = zlib.decompress(body)
        # redirect = self.rep.msg.get('location', None)   # handle 301/302
        self._content = body
        return body 
Example #20
Source File: nvd.py    From vulnix with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def download(self, mirror, meta):
        """Fetches compressed JSON data from NIST.

        Nothing is done if we have already seen the same version of
        the feed before.

        Returns True if anything has been loaded successfully.
        """
        url = mirror + self.download_uri
        _log.info('Loading %s', url)
        r = requests.get(url, headers=meta.headers_for(url))
        r.raise_for_status()
        if r.status_code == 200:
            _log.debug('Loading JSON feed "%s"', self.name)
            self.parse(gzip.decompress(r.content))
            meta.update_headers_for(url, r.headers)
            return True
        else:
            _log.debug('Skipping JSON feed "%s" (%s)', self.name, r.reason)
            return False 
Example #21
Source File: memory_profiler_e2e.py    From vprof with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def testRequest(self):
        runner.run(
            self._func, 'm', ('foo', 'bar'), host=_HOST, port=_PORT)
        response = urllib.request.urlopen(
            'http://%s:%s/profile' % (_HOST, _PORT))
        response_data = gzip.decompress(response.read())
        stats = json.loads(response_data.decode('utf-8'))
        curr_filename = inspect.getabsfile(inspect.currentframe())
        self.assertEqual(stats['m']['objectName'],
                         '_func @ %s (function)' % curr_filename)
        self.assertEqual(stats['m']['totalEvents'], 2)
        self.assertEqual(stats['m']['codeEvents'][0][0], 1)
        self.assertEqual(stats['m']['codeEvents'][0][1], 91)
        self.assertEqual(stats['m']['codeEvents'][0][3], '_func')
        self.assertEqual(stats['m']['codeEvents'][1][0], 2)
        self.assertEqual(stats['m']['codeEvents'][1][1], 92)
        self.assertEqual(stats['m']['codeEvents'][1][3], '_func')

# pylint: enable=missing-docstring, blacklisted-name 
Example #22
Source File: urllib.py    From pikaur with GNU General Public License v3.0 5 votes vote down vote up
def get_gzip_from_url(url: str) -> str:
    result_bytes = read_bytes_from_url(url)
    decompressed_bytes_response = gzip.decompress(result_bytes)
    text_response = decompressed_bytes_response.decode('utf-8')
    return text_response 
Example #23
Source File: compression.py    From chepy with GNU General Public License v3.0 5 votes vote down vote up
def raw_inflate(self):
        """Raw inflate data
        
        Returns:
            Chepy: The Chepy object. 
        """
        self.state = zlib.decompress(self._convert_to_bytes(), -15)
        return self 
Example #24
Source File: feed.py    From rssant with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_content(self, decompress=None):
        if decompress is None:
            decompress = self.is_gzipped
        content = self.content
        if content and decompress:
            content = gzip.decompress(content)
        return content 
Example #25
Source File: compression.py    From chepy with GNU General Public License v3.0 5 votes vote down vote up
def zlib_decompress(self):
        """Zlib decompression
        
        Returns:
            Chepy: The Chepy object. 

        Examples:
            >>> c = Chepy("789c0580a10d000008c35ee1b9ca05c104e737b761ca5711e8039a")
            >>> c.hex_to_binary()
            >>> c.zlib_decompress()
            >>> c.out()
            b"some text"
        """
        self.state = zlib.decompress(self._convert_to_bytes())
        return self 
Example #26
Source File: mmtfReader.py    From mmtf-pyspark with Apache License 2.0 5 votes vote down vote up
def _get_structure(pdbId, reduced, first_model):
    '''Download and decode a list of structure from a list of PDBid

    Parameters
    ----------
    pdbID : list
       List of structures to download

    Returns
    -------
    tuple
       pdbID and deccoder
    '''

    try:
        #unpack = default_api.get_raw_data_from_url(pdbId, reduced)
        url = default_api.get_url(pdbId, reduced)
        request = urllib2.Request(url)
        request.add_header('Accept-encoding', 'gzip')
        response = urllib2.urlopen(request)
        if response.info().get('Content-Encoding') == 'gzip':
            data = gzip.decompress(response.read())
        else:
            data = response.read()
        unpack = pd.read_msgpack(data)
        decoder = MmtfStructure(unpack, first_model)
        return (pdbId, decoder)
    except urllib.error.HTTPError:
        print(f"ERROR: {pdbId} is not a valid pdbId") 
Example #27
Source File: gelf_listener.py    From zoe with Apache License 2.0 5 votes vote down vote up
def handle(self):
        """Handle one UDP packet (one GELF log line in JSON format)."""
        data = self.rfile.read()
        try:
            data = gzip.decompress(data)
        except OSError:
            return
        data = json.loads(data.decode('utf-8'))
        deployment_name = data['_zoe_deployment_name']
        if deployment_name != get_conf().deployment_name:
            return

        execution_id = data['_zoe_execution_id']
        service_name = data['_zoe_service_name']
        host = data['host']
        timestamp = datetime.datetime.utcfromtimestamp(data['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
        message = data['short_message']

        log_file_path = os.path.join(get_conf().service_logs_base_path, get_conf().deployment_name, str(execution_id), service_name + '.txt')
        if not os.path.exists(log_file_path):
            os.makedirs(os.path.join(get_conf().service_logs_base_path, get_conf().deployment_name, str(execution_id)), exist_ok=True)
            open(log_file_path, 'wb').write('ZOE HEADER: log file for service {} running on host {}\n'.format(service_name, host).encode('utf-8'))

        with open(log_file_path, 'ab') as logfile:
            logline = timestamp + ' ' + message + '\n'
            logfile.write(logline.encode('utf-8')) 
Example #28
Source File: utils.py    From anvio with GNU General Public License v3.0 5 votes vote down vote up
def convert_binary_blob_to_numpy_array(blob, dtype, decompress=True):
    if decompress:
        return np.frombuffer(gzip.decompress(blob), dtype=dtype)
    else:
        return np.frombuffer(blob, dtype=dtype) 
Example #29
Source File: test_gz.py    From acsploit with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_single_file_bomb():
    output = DummyOutput()
    target_size = 1000
    gz.options['type'] = 'single_file'
    gz.options['target_size'] = target_size
    gz.run(output)
    assert output.count() == 1
    assert len(gzip.decompress(output[0])) == target_size 
Example #30
Source File: compression.py    From chepy with GNU General Public License v3.0 5 votes vote down vote up
def lzma_decompress(self):
        """Decompress lzma compressed data
        
        Returns:
            Chepy: The Chepy object. 
        """
        self.state = lzma.decompress(self._convert_to_bytes())
        return self