Python pandas.read_msgpack() Examples

The following are 11 code examples of pandas.read_msgpack(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: columnarStructure.py    From mmtf-pyspark with Apache License 2.0 6 votes vote down vote up
def get_chain_to_entity_index(self):
        '''Returns an array that maps a chain index to an entity index

        Returns
        -------
        :obj:`array <numpy.ndarray>`
           index that maps chain index to an entity index
        '''

        if self.entityChainIndex is None:

            #self.entityChainIndex = np.empty(self.structure.num_chains, dtype='>i4')
            self.entityChainIndex = np.empty(self.structure.num_chains, dtype=np.int32)

            for i, entity in enumerate(self.structure.entity_list):

                chainIndexList = entity['chainIndexList']
                # pd.read_msgpack returns tuple, msgpack-python returns list
                if type(chainIndexList) is not list:
                    chainIndexList = list(chainIndexList)
                self.entityChainIndex[chainIndexList] = i

        return self.entityChainIndex 
Example #2
Source File: mmtfStructure.py    From mmtf-pyspark with Apache License 2.0 6 votes vote down vote up
def chain_to_entity_index(self):
        '''Returns an array that maps a chain index to an entity index

        Returns
        -------
        :obj:`array <numpy.ndarray>`
           index that maps chain index to an entity index
        '''

        if self.entityChainIndex is None:
            self.entityChainIndex = np.empty(self.num_chains, dtype=np.int32)
            print("chain_to_entity_index: num_chains", self.num_chains)

            for i, entity in enumerate(self.entity_list):

                #chainIndexList = entity['chainIndexList']
                # pd.read_msgpack returns tuple, msgpack-python returns list
                # TODO check this
                #if type(chainIndexList) is not list:
                #    chainIndexList = list(chainIndexList)
                # TODO need to update entity_list when self.truncate
                for index in entity['chainIndexList']:
                    if index < self.num_chains:
                        self.entityChainIndex[index] = i 
Example #3
Source File: mmtfReader.py    From mmtf-pyspark with Apache License 2.0 6 votes vote down vote up
def _call_mmtf(f, first_model=False):
    '''Call function for mmtf files'''

    if ".mmtf.gz" in f:
        name = f.split('/')[-1].split('.')[0].upper()
        data = gzip.open(f, 'rb')
        #unpack = msgpack.unpack(data, raw=False)
        unpack = pd.read_msgpack(data)
        decoder = MmtfStructure(unpack, first_model)
        return (name, decoder)

    elif ".mmtf" in f:
        #name = f.split('/')[-1].split('.')[0].upper()
        #unpack = msgpack.unpack(open(f, "rb"), raw=False)
        #decoder = MmtfStructure(unpack)
        name = f.split('/')[-1].split('.')[0].upper()
        unpack = pd.read_msgpack(f)
        decoder = MmtfStructure(unpack, first_model)
        return (name, decoder) 
Example #4
Source File: cache.py    From catalyst with Apache License 2.0 6 votes vote down vote up
def __init__(self,
                 path=None,
                 lock=None,
                 clean_on_failure=True,
                 serialization='msgpack'):
        self.path = path if path is not None else mkdtemp()
        self.lock = lock if lock is not None else nop_context
        self.clean_on_failure = clean_on_failure

        if serialization == 'msgpack':
            self.serialize = pd.DataFrame.to_msgpack
            self.deserialize = pd.read_msgpack
            self._protocol = None
        else:
            s = serialization.split(':', 1)
            if s[0] != 'pickle':
                raise ValueError(
                    "'serialization' must be either 'msgpack' or 'pickle[:n]'",
                )
            self._protocol = int(s[1]) if len(s) == 2 else None

            self.serialize = self._serialize_pickle
            self.deserialize = pickle.load

        ensure_directory(self.path) 
Example #5
Source File: message.py    From timeflux with MIT License 6 votes vote down vote up
def msgpack_deserialize(message):
    # TODO: handle meta and cases where data is None
    topic = message[0].decode("utf-8")
    data = message[1]
    return [topic, pd.read_msgpack(data)]


# def arrow_serialize(message):
#     topic = message[0].decode('utf-8')
#     df = message[1]
#     return [topic, pa.serialize(df).to_buffer()]

# def arrow_deserialize(message):
#     topic = message[0]
#     data = message[1]
#     return [topic, pa.deserialize(data)] 
Example #6
Source File: cache.py    From git-pandas with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get(self, orik):
        k = self.prefix + orik
        if self.exists(orik):
            return pd.read_msgpack(self._cache.get(k))
        else:
            try:
                idx = self._key_list.index(k)
                self._key_list.pop(idx)
            except ValueError as e:
                pass
            raise CacheMissException(k) 
Example #7
Source File: run_pandas.py    From recipy with Apache License 2.0 5 votes vote down vote up
def read_msgpack(self):
        """
        Use pandas.read_msgpack to load dataframe.mpack.
        """
        file_name = os.path.join(self.data_dir, "dataframe.mpack")
        pd.read_msgpack(file_name) 
Example #8
Source File: mmtfReader.py    From mmtf-pyspark with Apache License 2.0 5 votes vote down vote up
def _get_structure(pdbId, reduced, first_model):
    '''Download and decode a list of structure from a list of PDBid

    Parameters
    ----------
    pdbID : list
       List of structures to download

    Returns
    -------
    tuple
       pdbID and deccoder
    '''

    try:
        #unpack = default_api.get_raw_data_from_url(pdbId, reduced)
        url = default_api.get_url(pdbId, reduced)
        request = urllib2.Request(url)
        request.add_header('Accept-encoding', 'gzip')
        response = urllib2.urlopen(request)
        if response.info().get('Content-Encoding') == 'gzip':
            data = gzip.decompress(response.read())
        else:
            data = response.read()
        unpack = pd.read_msgpack(data)
        decoder = MmtfStructure(unpack, first_model)
        return (pdbId, decoder)
    except urllib.error.HTTPError:
        print(f"ERROR: {pdbId} is not a valid pdbId") 
Example #9
Source File: mmtfReader.py    From mmtf-pyspark with Apache License 2.0 5 votes vote down vote up
def _call_sequence_file(t, first_model):
    '''Call function for hadoop sequence files'''
    # TODO: check if all sequence files are gzipped
    # data = default_api.ungzip_data(t[1])
    # unpack = msgpack.unpackb(data.read(), raw=False)
    # decoder = MmtfStructure(unpack)
    # return (str(t[0]), decoder)
    data = gzip.decompress(t[1])
    unpack = pd.read_msgpack(data)
    decoder = MmtfStructure(unpack, first_model)
    return (t[0], decoder) 
Example #10
Source File: dataframe_bytes_storage.py    From pyABC with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def df_from_bytes_msgpack_(bytes_: bytes) -> pd.DataFrame:
    try:
        df = pd.read_msgpack(BytesIO(bytes_))
    except UnicodeDecodeError:
        raise DataFrameLoadException("Not a DataFrame")
    if not isinstance(df, pd.DataFrame):
        raise DataFrameLoadException("Not a DataFrame")
    return df 
Example #11
Source File: stock_resampler.py    From QUANTAXIS_RealtimeCollector with MIT License 4 votes vote down vote up
def on_message_callback(self, channel, method, properties, body):
        context = pd.read_msgpack(body)
        # merge update
        if self.market_data is None:
            # self.market_data = context
            pass
        else:
            logger.info("Before market_data, concat and update start, 合并市场数据")
            cur_time = datetime.datetime.now()
            self.market_data.update(context)
            end_time = datetime.datetime.now()
            cost_time = (end_time - cur_time).total_seconds()
            logger.info("Before market_data, concat and update end, 合并市场数据, 耗时,cost: %s s" % cost_time)
            logger.info(self.market_data.to_csv(float_format='%.3f'))
            filename = get_file_name_by_date('stock.market.%s.csv', self.log_dir)
            # 不追加,复写
            logging_csv(self.market_data, filename, index=True, mode='w')

        # group by code and resample
        try:
            cur_time = datetime.datetime.now()
            bar_data: pd.DataFrame = tdx_stock_bar_resample_parallel(
                self.market_data[self.market_data.close > 0], self.frequency, jobs=self.cpu_count
            )
            end_time = datetime.datetime.now()
            cost_time = (end_time - cur_time).total_seconds()
            logger.info("数据重采样耗时,cost: %s" % cost_time)
            logger.info("发送重采样数据中start")
            self.publish_msg(bar_data.to_msgpack())
            logger.info("发送重采样数据完毕end")

            logger.info(bar_data.to_csv(float_format='%.3f'))
            filename = get_file_name_by_date('stock.bar.%s.csv', self.log_dir)
            # 不追加,复写
            logging_csv(bar_data, filename, index=True, mode='w')
            del bar_data
        except Exception as e:
            logger.error("failure股票重采样数据. " + e.__str__())
        finally:
            logger.info("重采样计数 count : %s" % self.count)
        self.count += 1
        del context