Python gzip.compress() Examples

The following are 30 code examples of gzip.compress(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gzip , or try the search function .
Example #1
Source File: dataserializer.py    From mars with Apache License 2.0 6 votes vote down vote up
def loads(buf):
    mv = memoryview(buf)
    header = read_file_header(mv)
    compress = header.compress

    if compress == CompressType.NONE:
        data = buf[HEADER_LENGTH:]
    else:
        data = decompressors[compress](mv[HEADER_LENGTH:])

    if header.type == SerialType.ARROW:
        try:
            return pyarrow.deserialize(memoryview(data), mars_serialize_context())
        except pyarrow.lib.ArrowInvalid:  # pragma: no cover
            # reconstruct value from buffers of arrow components
            data_view = memoryview(data)
            meta_block_size = np.frombuffer(data_view[0:4], dtype='int32').item()
            meta = pickle.loads(data_view[4:4 + meta_block_size])  # nosec
            buffer_sizes = meta.pop('buffer_sizes')
            bounds = np.cumsum([4 + meta_block_size] + buffer_sizes)
            meta['data'] = [pyarrow.py_buffer(data_view[bounds[idx]:bounds[idx + 1]])
                            for idx in range(len(buffer_sizes))]
            return pyarrow.deserialize_components(meta, mars_serialize_context())
    else:
        return pickle.loads(data) 
Example #2
Source File: mmtfWriter.py    From mmtf-pyspark with Apache License 2.0 6 votes vote down vote up
def _to_byte_array(structure, compressed):
    '''Returns an MMTF-encoded byte array with optional gzip compression

    Returns
    -------
    list
       MMTF encoded and optionally gzipped structure data
    '''

    byte_array = bytearray(msgpack.packb(structure.input_data, use_bin_type=True))
    #byte_array = bytearray(msgpack.packb(MMTFEncoder.encode_data(structure), use_bin_type = True))

    if compressed:
        return gzip.compress(byte_array)
    else:
        return byte_array 
Example #3
Source File: test_loadurl.py    From cjworkbench with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_fetch_deflate_encoded_csv(self):
        body = b"A,B\nx,y\nz,a"
        zo = zlib.compressobj(wbits=-zlib.MAX_WBITS)
        zbody = zo.compress(body) + zo.flush()
        url = self.build_url("/path/to.csv.gz")
        self.mock_http_response = MockHttpResponse.ok(
            zbody,
            [
                ("Content-Type", "text/csv; charset=utf-8"),
                ("Content-Encoding", "deflate"),
            ],
        )
        with call_fetch(url) as result:
            self.assertEqual(result.errors, [])
            with httpfile.read(result.path) as (_, __, headers, body_path):
                self.assertEqual(body_path.read_bytes(), body) 
Example #4
Source File: routeconfig.py    From metrics-mvp with MIT License 6 votes vote down vote up
def save_routes(agency_id, routes, save_to_s3=False):
    data_str = json.dumps({
        'version': DefaultVersion,
        'routes': [route.data for route in routes]
    }, separators=(',', ':'))

    cache_path = get_cache_path(agency_id)

    with open(cache_path, "w") as f:
        f.write(data_str)

    if save_to_s3:
        s3 = boto3.resource('s3')
        s3_path = get_s3_path(agency_id)
        s3_bucket = config.s3_bucket
        print(f'saving to s3://{s3_bucket}/{s3_path}')
        object = s3.Object(s3_bucket, s3_path)
        object.put(
            Body=gzip.compress(bytes(data_str, 'utf-8')),
            CacheControl='max-age=86400',
            ContentType='application/json',
            ContentEncoding='gzip',
            ACL='public-read'
        ) 
Example #5
Source File: utils.py    From esi-knife with MIT License 6 votes vote down vote up
def write_data(uuid, data):
    """Try to store the data, log errors."""

    try:
        CACHE.set(
            "{}{}".format(Keys.complete.value, uuid),
            codecs.decode(
                base64.b64encode(compress(codecs.encode(
                    ujson.dumps(data),
                    "utf-8",
                ))),
                "utf-8",
            ),
            timeout=EXPIRY,
        )
    except Exception as error:
        LOG.warning("Failed to save data: %r", error) 
Example #6
Source File: userdata.py    From kOVHernetes with Apache License 2.0 6 votes vote down vote up
def gen_kubeconfig(self, component, server='localhost'):
        """Generate kubeconfig"""

        kubeconfig = loads(files['kubeconfig'].decode(), object_pairs_hook=OrderedDict)
        kubeconfig['users'][0]['user']['client-certificate'] = 'tls/client/{}.crt'.format(component)
        kubeconfig['clusters'][0]['cluster']['server'] = 'https://' + server + ':6443'

        kubeconfig = compress((dumps(kubeconfig, indent=2) + '\n').encode())

        self.add_files([
            {
                'filesystem': 'root',
                'path': '/etc/kubernetes/kubeconfig-' + component + '.gz',
                'mode': 416, # 0640
                'contents': {
                    'source': 'data:,' + quote(kubeconfig)
                }
            }
        ]) 
Example #7
Source File: data.py    From pyAFQ with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def s3fs_nifti_write(img, fname, fs=None):
    """
    Write a nifti file straight to S3

    Paramters
    ---------
    img : nib.Nifti1Image class instance
        The image containing data to be written into S3
    fname : string
        Full path (including bucket name and extension) to the S3 location
        where the file is to be saved.
    fs : an s3fs.S3FileSystem class instance, optional
        A file-system to refer to. Default to create a new file-system
    """
    if fs is None:
        fs = s3fs.S3FileSystem()

    bio = BytesIO()
    file_map = img.make_file_map({'image': bio, 'header': bio})
    img.to_file_map(file_map)
    data = gzip.compress(bio.getvalue())
    with fs.open(fname, 'wb') as ff:
        ff.write(data) 
Example #8
Source File: datauri.py    From igv-reports with MIT License 6 votes vote down vote up
def get_data_uri(data):

    """
    Return a data uri for the input, which can be either a string or byte array
    """

    if isinstance(data, str):
        data = compress(data.encode())
        mediatype = "data:application/gzip"
    else:
        if data[0] == 0x1f and data[1] == 0x8b:
            mediatype = "data:application/gzip"
        else:
            mediatype = "data:application:octet-stream"

    enc_str = b64encode(data)

    data_uri = mediatype + ";base64," + str(enc_str)[2:-1]
    return data_uri 
Example #9
Source File: userdata.py    From kOVHernetes with Apache License 2.0 6 votes vote down vote up
def gen_kubemanifest(self, component, tag):
        """Generate Kubernetes Pod manifest"""

        manifest = loads(files[component].decode(), object_pairs_hook=OrderedDict)
        manifest['spec']['containers'][0]['image'] = 'k8s.gcr.io/hyperkube:v{}'.format(self.k8s_ver)

        manifest = compress((dumps(manifest, indent=2) + '\n').encode())

        self.add_files([
            {
                'filesystem': 'root',
                'path': '/etc/kubernetes/manifests/kube-{}.json'.format(component) + '.gz',
                'mode': 416, # 0640
                'contents': {
                    'source': 'data:,' + quote(manifest)
                }
            }
        ]) 
Example #10
Source File: cli.py    From esi-knife with MIT License 6 votes vote down vote up
def write_results(results, character_id):
    """Write the results to a compressed .knife file."""

    fname = "{}.knife".format(character_id)
    i = 0
    while os.path.isfile(fname):
        i += 1
        fname = "{}-{}.knife".format(character_id, i)

    with open(fname, "w") as openout:
        openout.write(codecs.decode(
            base64.b64encode(compress(codecs.encode(
                json.dumps(results),
                "utf-8",
            ))),
            "utf-8",
        ))

    print("created {}".format(fname)) 
Example #11
Source File: shp2json.py    From handson-labs-2018 with MIT License 6 votes vote down vote up
def upload_s3(bucket, json_file, metadata):
    """
    파일을 gz하여 s3로 업로드
    :param json_file: 업로드할 json 파일명
    :return:
    """
    gz_name = f"{json_file}.gz"
    obj_key = f"json/{path.basename(gz_name)}"
    print("업로드", gz_name, obj_key)

    with open(json_file, 'rb') as f:
        gz = gzip.compress(f.read())
        s3.put_object(
            Body=gz,
            Bucket=bucket,
            ContentEncoding='gzip',
            ContentLanguage='string',
            ContentType='application/json',
            Key=obj_key,
            # todo : 메타데이터 추가 - 2018-07-28
            Metadata=metadata,
        ) 
Example #12
Source File: dataserializer.py    From mars with Apache License 2.0 6 votes vote down vote up
def dump(obj, file, *, serial_type=None, compress=None, pickle_protocol=None):
    if serial_type is None:
        serial_type = SerialType.ARROW if pyarrow is not None else SerialType.PICKLE
    if compress is None:
        compress = CompressType.NONE
    try:
        if serial_type == SerialType.ARROW:
            serialized = pyarrow.serialize(obj, mars_serialize_context())
            data_size = serialized.total_bytes
            write_file_header(file, file_header(serial_type, SERIAL_VERSION, data_size, compress))
            file = open_compression_file(file, compress)
            serialized.write_to(file)
        else:
            pickle_protocol = pickle_protocol or pickle.HIGHEST_PROTOCOL
            serialized = pickle.dumps(obj, protocol=pickle_protocol)
            data_size = len(serialized)
            write_file_header(file, file_header(serial_type, SERIAL_VERSION, data_size, compress))
            file = open_compression_file(file, compress)
            file.write(serialized)
    finally:
        if compress != CompressType.NONE:
            file.close()
    return 
Example #13
Source File: mmtfWriter.py    From mmtf-pyspark with Apache License 2.0 6 votes vote down vote up
def write_sequence_file(path, structure, compressed=True):
    '''Encodes and writes MMTF encoded structure data to a Hadoop Sequnce File

    Parameters
    ----------
    path : str
       Path to Hadoop file directory)
    structure : tuple
       structure data to be written
    compress : bool
       if true, apply gzip compression
    '''
    # Can't apply first() function on list

    structure.map(lambda t: (t[0], _to_byte_array(t[1], compressed)))\
             .saveAsHadoopFile(path,
                               "org.apache.hadoop.mapred.SequenceFileOutputFormat",
                               "org.apache.hadoop.io.Text",
                               "org.apache.hadoop.io.BytesWritable") 
Example #14
Source File: test_persistence.py    From palladium with Apache License 2.0 5 votes vote down vote up
def test_download(self, mocked_requests, persister):
        """ test download and activation of a model """
        expected = Dummy(name='mymodel', __metadata__={})
        zipped_model = gzip.compress(pickle.dumps(expected))

        get_md_url = "%s/mymodel-metadata.json" % (self.base_url,)
        mocked_requests.head(get_md_url, status_code=200)
        get_md = mocked_requests.get(
            get_md_url,
            json={"models": [{"version": 1}],
                  "properties": {'active-model': 1}},
            status_code=200,
            )

        get_model_url = "%s/mymodel-1.pkl.gz" % (self.base_url,)
        mocked_requests.head(get_model_url, status_code=200)
        get_model = mocked_requests.get(
            get_model_url,
            content=zipped_model,
            status_code=200,
            )

        model = persister.read()
        assert get_md.called
        assert get_model.called
        assert model == expected
        self.assert_auth_headers(mocked_requests) 
Example #15
Source File: v21_to_v22.py    From anvio with GNU General Public License v3.0 5 votes vote down vote up
def convert_numpy_array_to_binary_blob(array, compress=True):
    if compress:
        return gzip.compress(memoryview(array), compresslevel=1)
    else:
        return memoryview(array) 
Example #16
Source File: minio_storage_for_collectstatic.py    From cjworkbench with GNU Affero General Public License v3.0 5 votes vote down vote up
def _upload_in_thread(self, name: str, data: bytes) -> None:
        """
        Perform the actual upload.

        Raise an exception if the file is not certainly uploaded.
        """

        content_type, _ = mimetypes.guess_type(name, strict=False)
        content_type = content_type or "application/octet-stream"

        kwargs = {}
        if content_type.startswith("text") or content_type.split("/")[1] in (
            "xml",
            "json",
            "javascript",
        ):
            data = gzip.compress(data)
            kwargs["ContentEncoding"] = "gzip"

        client.put_object(
            Body=data,
            Bucket=StaticFilesBucket,
            Key=name,
            ContentLength=len(data),
            ContentType=content_type,
            # These are static files, but only Webpack-generated files have
            # hashed filenames. Logos and whatnot don't. So let's tell the
            # browser to cache for one day, to time-bound the damage when we
            # deploy a new version of our logo and users keep the old one.
            CacheControl="public, max-age=86400",
            **kwargs,
        )
        logger.info("Finished uploading %s (%d bytes)" % (name, len(data))) 
Example #17
Source File: test_persistence.py    From palladium with Apache License 2.0 5 votes vote down vote up
def dbmodel(self, database):
        from palladium.util import session_scope

        model = Dummy(
            name='mymodel',
            __metadata__={'some': 'metadata', 'version': 1},
            )

        model_blob = gzip.compress(pickle.dumps(model), compresslevel=0)
        chunk_size = 4
        chunks = [model_blob[i:i + chunk_size]
                  for i in range(0, len(model_blob), chunk_size)]

        dbmodel = database.DBModel(
            version=1,
            chunks=[
                database.DBModelChunk(
                    model_version=1,
                    blob=chunk,
                    )
                for chunk in chunks
                ],
            metadata_=json.dumps(model.__metadata__),
            )

        with session_scope(database.session) as session:
            session.add(dbmodel)

        return model 
Example #18
Source File: v9_to_v10.py    From anvio with GNU General Public License v3.0 5 votes vote down vote up
def convert_numpy_array_to_binary_blob(array, compress=True):
    if compress:
        return gzip.compress(memoryview(array), compresslevel=1)
    else:
        return memoryview(array) 
Example #19
Source File: huobipro.py    From arbcharm with MIT License 5 votes vote down vote up
def compress_msg(msg):
        return gzip.compress(json.dumps(msg).encode()) 
Example #20
Source File: test_format.py    From hermit with Apache License 2.0 5 votes vote down vote up
def test_base64(self):
        with pytest.raises(hermit.InvalidSignatureRequest):
            hermit.decode_qr_code_data(base64.b64encode(gzip.compress(_DECODED.encode('utf-8')))) 
Example #21
Source File: test_format.py    From hermit with Apache License 2.0 5 votes vote down vote up
def test_not_utf8(self):
        with pytest.raises(hermit.InvalidSignatureRequest):
            hermit.decode_qr_code_data(base64.b32encode(gzip.compress(_DECODED.encode('utf-16')))) 
Example #22
Source File: precomputed_stats.py    From metrics-mvp with MIT License 5 votes vote down vote up
def save_stats(agency_id, stat_id, d, start_time_str, end_time_str, scheduled, data, save_to_s3=False):
    data_str = json.dumps({
        'version': DefaultVersion,
        'stat_id': stat_id,
        'start_time': start_time_str,
        'end_time': end_time_str,
        **data
    }, separators=(',', ':'))

    cache_path = get_cache_path(agency_id, stat_id, d, start_time_str, end_time_str, scheduled)

    cache_dir = Path(cache_path).parent
    if not cache_dir.exists():
        cache_dir.mkdir(parents = True, exist_ok = True)

    print(f'saving to {cache_path}')
    with open(cache_path, "w") as f:
        f.write(data_str)

    if save_to_s3:
        s3 = boto3.resource('s3')
        s3_path = get_s3_path(agency_id, stat_id, d, start_time_str, end_time_str, scheduled)
        s3_bucket = config.s3_bucket
        print(f'saving to s3://{s3_bucket}/{s3_path}')
        object = s3.Object(s3_bucket, s3_path)
        object.put(
            Body=gzip.compress(bytes(data_str, 'utf-8')),
            CacheControl='max-age=86400',
            ContentType='application/json',
            ContentEncoding='gzip',
            ACL='public-read'
        ) 
Example #23
Source File: benchmark_zip.py    From rssant with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def main():
    print(len(data), len(data_gzip), len(data_lz4))
    for i in range(10):
        t1 = timeit.timeit(lambda: gzip.compress(data), number=1000)
        t2 = timeit.timeit(lambda: lz4.compress(data), number=1000)
        print(t1, t2)
    for i in range(10):
        t1 = timeit.timeit(lambda: gzip.decompress(data_gzip), number=1000)
        t2 = timeit.timeit(lambda: lz4.decompress(data_lz4), number=1000)
        print(t1, t2) 
Example #24
Source File: story_data.py    From rssant with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def encode(self) -> bytes:
        version = struct.pack('>B', self._version)
        if self._version == self.VERSION_GZIP:
            data_bytes = gzip.compress(self._value, compresslevel=5)
        elif self._version == self.VERSION_LZ4:
            data_bytes = lz4.compress(self._value, compression_level=7)
        elif self._version == self.VERSION_RAW:
            data_bytes = self._value
        else:
            assert False, f'unknown version {version}'
        return version + data_bytes 
Example #25
Source File: feed.py    From rssant with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def set_content(self, content):
        if content and len(content) >= 1024:
            self.content = gzip.compress(content, compresslevel=9)
            self.is_gzipped = True
        else:
            self.content = content
            self.is_gzipped = False 
Example #26
Source File: story_unique_ids.py    From rssant with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def encode(self) -> bytes:
        value = '\n'.join(self._unique_ids).encode('utf-8')
        unique_ids_gzip = gzip.compress(value)
        header = struct.pack('>BI', self._version, self._begin_offset)
        return header + unique_ids_gzip 
Example #27
Source File: message.py    From rssant with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def raw_encode(cls, data, content_encoding=None):
        content_encoding = ContentEncoding.of(content_encoding)
        try:
            if content_encoding.is_json:
                data = json.dumps(data, ensure_ascii=False).encode('utf-8')
            else:
                data = msgpack.packb(data, use_bin_type=True)
        except (ValueError, TypeError) as ex:
            raise ActorMessageEncodeError(str(ex)) from ex
        if content_encoding.is_gzip:
            data = gzip.compress(data)
        return data 
Example #28
Source File: userdata.py    From kOVHernetes with Apache License 2.0 5 votes vote down vote up
def res_gzip(resource):
    """Returns package data as gzipped bytes"""
    return compress(res_plain(resource))

# Reusable data from static files 
Example #29
Source File: arrival_history.py    From metrics-mvp with MIT License 5 votes vote down vote up
def save_for_date(history: ArrivalHistory, d: date, s3=False):
    data_str = json.dumps(history.get_data())

    version = history.version
    agency_id = history.agency_id
    route_id = history.route_id

    cache_path = get_cache_path(agency_id, route_id, d, version)

    cache_dir = Path(cache_path).parent
    if not cache_dir.exists():
        cache_dir.mkdir(parents = True, exist_ok = True)

    with open(cache_path, "w") as f:
        f.write(data_str)

    if s3:
        s3 = boto3.resource('s3')
        s3_path = get_s3_path(agency_id, route_id, d, version)
        s3_bucket = config.s3_bucket
        print(f'saving to s3://{s3_bucket}/{s3_path}')
        object = s3.Object(s3_bucket, s3_path)
        object.put(
            Body=gzip.compress(bytes(data_str, 'utf-8')),
            ContentType='application/json',
            ContentEncoding='gzip',
            ACL='public-read'
        ) 
Example #30
Source File: s3.py    From S4 with GNU General Public License v3.0 5 votes vote down vote up
def flush_index(self, compressed=True):
        data = json.dumps(self.index).encode("utf-8")
        if compressed:
            logger.debug("Using gzip encoding for writing index")
            data = gzip.compress(data)
        else:
            logger.debug("Using plain text encoding for writing index")

        self.boto.put_object(Bucket=self.bucket, Key=self.index_path(), Body=data)