Python pandas.HDFStore() Examples

The following are 30 code examples for showing how to use pandas.HDFStore(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may want to check out the right sidebar which shows the related API usage.

You may also want to check out all available functions/classes of the module pandas , or try the search function .

Example 1
Project: OpenTrader   Author: OpenTrading   File: Omlette.py    License: GNU Lesser General Public License v3.0 6 votes vote down vote up
def iMain():
    """
    Read an hdf file generated by us to make sure
    we can recover its content and structure.
    Give the name of an hdf5 file as a command-line argument.
    """
    assert sys.argv, __doc__
    sFile = sys.argv[1]
    assert os.path.isfile(sFile)
    oHdfStore = pandas.HDFStore(sFile, mode='r')
    print oHdfStore.groups()
    # bug - no return value
    # oSignals = pandas.read_hdf(oHdfStore, '/servings/signals')
    mSignals = oHdfStore.select('/recipe/servings/mSignals', auto_close=False)    
    print mSignals
    print oHdfStore.get_node('/recipe')._v_attrs.metadata[0]['sUrl'] 
Example 2
Project: avocado   Author: kboone   File: utils.py    License: MIT License 6 votes vote down vote up
def _create_csi_index(store, key, column_name):
    """Create a CSI index on a column in an HDF5 file.

    The column must have been already specified in the data_columns call to
    to_hdf or it won't be stored correctly in the HDF5 file.

    Parameters
    ----------
    store : :class:`pandas.HDFStore`
        An HDF5 file opened as an instance of a :class:`pandas.HDFStore`
        object.
    key : str
        The key of the DataFrame to use.
    column_name : str
        The column to add a CSI index to.
    """
    key_store = store.get_storer(key)
    use_name = _map_column_name(key_store, column_name)
    column = key_store.table.colinstances[use_name]

    if not column.index.is_csi:
        column.remove_index()
        column.create_csindex() 
Example 3
Project: avocado   Author: kboone   File: dataset.py    License: MIT License 6 votes vote down vote up
def write_models(self, tag=None):
        """Write the models of the light curves to disk.

        The models will be stored in the features directory using the dataset's
        name and the given features tag. Note that for now the models are
        stored as individual tables in the HDF5 file because there doesn't
        appear to be a good way to store fixed length arrays in pandas.

        WARNING: This is not the best way to implement this, and there are
        definitely much better ways. This also isn't thread-safe at all.

        Parameters
        ----------
        tag : str (optional)
            The tag for this version of the features. By default, this will use
            settings['features_tag'].
        """
        models_path = self.get_models_path(tag=tag)

        store = pd.HDFStore(models_path, "a")
        for model_name, model in self.models.items():
            model.to_hdf(store, model_name, mode="a")
        store.close() 
Example 4
Project: vivarium   Author: ihmeuw   File: hdf.py    License: GNU General Public License v3.0 6 votes vote down vote up
def _write_pandas_data(path: Path, entity_key: EntityKey, data: Union[PandasObj]):
    """Write data in a pandas format to an HDF file.

    This method currently supports :class:`pandas DataFrame` objects, with or
    with or without columns, and :class:`pandas.Series` objects.

    """
    if data.empty:
        # Our data is indexed, sometimes with no other columns. This leaves an
        # empty dataframe that store.put will silently fail to write in table
        # format.
        data = data.reset_index()
        if data.empty:
            raise ValueError("Cannot write an empty dataframe that does not have an index.")
        metadata = {'is_empty': True}
        data_columns = True
    else:
        metadata = {'is_empty': False}
        data_columns = None

    with pd.HDFStore(str(path), complevel=9) as store:
        store.put(entity_key.path, data, format="table", data_columns=data_columns)
        store.get_storer(entity_key.path).attrs.metadata = metadata  # NOTE: must use attrs. write this up 
Example 5
Project: FRETBursts   Author: tritemio   File: burstlib_ext.py    License: GNU General Public License v2.0 6 votes vote down vote up
def _store_bg_data(store, base_name, min_ph_delays_us, best_bg, best_th,
                   BG_data, BG_data_e):
    if not base_name.endswith('/'):
        base_name = base_name + '/'
    store_name = store.filename
    group_name = '/' + base_name[:-1]
    store.create_carray(group_name, 'min_ph_delays_us', obj=min_ph_delays_us,
                        createparents=True)
    for ph_sel, values in BG_data.items():
        store.create_carray(group_name, str(ph_sel), obj=values)
    for ph_sel, values in BG_data_e.items():
        store.create_carray(group_name, str(ph_sel) + '_err', obj=values)
    store.close()
    store = pd.HDFStore(store_name)
    store[base_name + 'best_bg'] = best_bg
    store[base_name + 'best_th'] = best_th
    store.close() 
Example 6
Project: FRETBursts   Author: tritemio   File: burstlib_ext.py    License: GNU General Public License v2.0 6 votes vote down vote up
def _load_bg_data(store, base_name, ph_streams):
    if not base_name.endswith('/'):
        base_name = base_name + '/'
    store_name = store.filename
    group_name = '/' + base_name[:-1]
    min_ph_delays = store.get_node(group_name, 'min_ph_delays_us')[:]
    BG_data = {}
    for ph_sel in ph_streams:
        BG_data[ph_sel] = store.get_node(group_name, str(ph_sel))[:]
    BG_data_e = {}
    for ph_sel in ph_streams:
        BG_data_e[ph_sel] = store.get_node(group_name, str(ph_sel) + '_err')[:]
    store.close()
    store = pd.HDFStore(store_name)
    best_bg = store[base_name + 'best_bg']
    best_th = store[base_name + 'best_th']
    store.close()
    return best_th, best_bg, BG_data, BG_data_e, min_ph_delays 
Example 7
Project: twint   Author: twintproject   File: panda.py    License: MIT License 6 votes vote down vote up
def save(_filename, _dataframe, **options):
    if options.get("dataname"):
        _dataname = options.get("dataname")
    else:
        _dataname = "twint"

    if not options.get("type"):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            _store = pd.HDFStore(_filename + ".h5")
            _store[_dataname] = _dataframe
            _store.close()
    elif options.get("type") == "Pickle":
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            _dataframe.to_pickle(_filename + ".pkl")
    else:
        print("""Please specify: filename, DataFrame, DataFrame name and type
              (HDF5, default, or Pickle)""") 
Example 8
Project: twint   Author: twintproject   File: panda.py    License: MIT License 6 votes vote down vote up
def read(_filename, **options):
    if not options.get("dataname"):
        _dataname = "twint"
    else:
        _dataname = options.get("dataname")

    if not options.get("type"):
        _store = pd.HDFStore(_filename + ".h5")
        _df = _store[_dataname]
        return _df
    elif options.get("type") == "Pickle":
        _df = pd.read_pickle(_filename + ".pkl")
        return _df
    else:
        print("""Please specify: DataFrame, DataFrame name (twint as default),
              filename and type (HDF5, default, or Pickle""") 
Example 9
Project: gcr-catalogs   Author: LSSTDESC   File: dc2_object.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _open_hdf5(self, file_path):
        """Return the file handle of an HDF5 file as an pd.HDFStore object

        Cache and return the file handle for the HDF5 file at <file_path>

        Args:
            file_path (str): The path of the desired file

        Return:
            The cached file handle
        """

        if (file_path not in self._file_handles or
                not self._file_handles[file_path].is_open):
            self._file_handles[file_path] = pd.HDFStore(file_path, 'r')

        return self._file_handles[file_path] 
Example 10
Project: OptiType   Author: FRED-2   File: hlatyper.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def store_dataframes(out_hdf, **kwargs):
    # DataFrames to serialize have to be passed by keyword arguments. An argument matrix1=DataFrame(...)
    # will be written into table 'matrix1' in the HDF file.

    complevel = kwargs.pop('complevel', 9)   # default complevel & complib values if
    complib = kwargs.pop('complib', 'zlib')  # not explicitly asked for as arguments

    if VERBOSE:
        print(now(), 'Storing %d DataFrames in file %s with compression settings %d %s...' % (len(kwargs), out_hdf, complevel, complib))

    store = pd.HDFStore(out_hdf, complevel=complevel, complib=complib)  # TODO: WRITE ONLY? it probably appends now
    for table_name, dataframe in kwargs.items():
        store[table_name] = dataframe
    store.close()

    if VERBOSE:
        print(now(), 'DataFrames stored in file.') 
Example 11
Project: orca   Author: UDST   File: test_orca.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_write_tables(df, store_name):
    orca.add_table('table', df)

    @orca.step()
    def step(table):
        pass

    step_tables = orca.get_step_table_names(['step'])

    orca.write_tables(store_name, step_tables, None)
    with pd.HDFStore(store_name, mode='r') as store:
        assert 'table' in store
        pdt.assert_frame_equal(store['table'], df)

    orca.write_tables(store_name, step_tables, 1969)

    with pd.HDFStore(store_name, mode='r') as store:
        assert '1969/table' in store
        pdt.assert_frame_equal(store['1969/table'], df) 
Example 12
Project: orca   Author: UDST   File: test_orca.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_run_and_write_tables_out_tables_provided(df, store_name):
    table_names = ['table', 'table2', 'table3']
    for t in table_names:
        orca.add_table(t, df)

    @orca.step()
    def step(iter_var, table, table2):
        return

    orca.run(
        ['step'],
        iter_vars=range(1),
        data_out=store_name,
        out_base_tables=table_names,
        out_run_tables=['table'])

    with pd.HDFStore(store_name, mode='r') as store:

        for t in table_names:
            assert 'base/{}'.format(t) in store

        assert '0/table' in store
        assert '0/table2' not in store
        assert '0/table3' not in store 
Example 13
Project: catalyst   Author: enigmampc   File: minute_bars.py    License: Apache License 2.0 6 votes vote down vote up
def write(self, frames):
        """
        Write the frames to the target HDF5 file, using the format used by
        ``pd.Panel.to_hdf``

        Parameters
        ----------
        frames : iter[(int, DataFrame)] or dict[int -> DataFrame]
            An iterable or other mapping of sid to the corresponding OHLCV
            pricing data.
        """
        with HDFStore(self._path, 'w',
                      complevel=self._complevel, complib=self._complib) \
                as store:
            panel = pd.Panel.from_dict(dict(frames))
            panel.to_hdf(store, 'updates')
        with tables.open_file(self._path, mode='r+') as h5file:
            h5file.set_node_attr('/', 'version', 0) 
Example 14
Project: tierpsy-tracker   Author: ver228   File: helper.py    License: MIT License 6 votes vote down vote up
def calculate_bgnd_from_masked_fulldata(masked_image_file):
    """
    - Opens the masked_image_file hdf5 file, reads the /full_data node and 
      creates a "background" by taking the maximum value of each pixel over time.
    - Parses the file name to find a camera serial number
    - reads the pixel/um ratio from the masked_image_file
    """
    import numpy as np
    from tierpsy.helper.params import read_unit_conversions

    # read attributes of masked_image_file
    _, (microns_per_pixel, xy_units) , is_light_background = read_unit_conversions(masked_image_file)
    # get "background" and px2um
    with pd.HDFStore(masked_image_file, 'r') as fid:
        assert is_light_background, \
        'MultiWell recognition is only available for brightfield at the moment'
        img = np.max(fid.get_node('/full_data'), axis=0)
    
    camera_serial = parse_camera_serial(masked_image_file)
    
    return img, camera_serial, microns_per_pixel 
Example 15
Project: tierpsy-tracker   Author: ver228   File: process_ow.py    License: MIT License 6 votes vote down vote up
def ow_plate_summary(fname):
    all_feats = read_feat_events(fname)
    
    with pd.HDFStore(fname, 'r') as fid:
        features_timeseries = fid['/features_timeseries']
    for cc in features_timeseries:
        all_feats[cc] = features_timeseries[cc].values
    
    wStats = WormStats()
    exp_feats = wStats.getWormStats(all_feats, np.nanmean)
    
    
    exp_feats = pd.DataFrame(exp_feats)
    
    valid_order = [x for x in exp_feats.columns if x not in wStats.extra_fields]
    exp_feats = exp_feats.loc[:, valid_order]
    
    return [exp_feats]
#%% 
Example 16
Project: pykonal   Author: malcolmw   File: eq_loc.py    License: GNU General Public License v3.0 6 votes vote down vote up
def load_stations(input_file):
    """
    Load and return network geometry from input file.

    Input file must be HDF5 file created using pandas.HDFStore with a
    "stations" table that contains "network", "station", "latitude",
    "longitude", and "elevation" fields. Units of degrees are assumed
    for "latitude" and "longitude", and units of kilometers are assumed
    for "elevation".

    Returns: pandas.DataFrame object with "network", "station",
    "latitude", "longitude", and "depth" fields. Units of "depth" are
    kilometers.
    """

    with pd.HDFStore(input_file, mode="r") as store:
        stations = store["stations"]

    stations["depth"] = -stations["elevation"]
    stations = stations[
        ["network", "station", "latitude", "longitude", "depth"]
    ]

    return (stations) 
Example 17
Project: OpenTrader   Author: OpenTrading   File: Omlette.py    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, sHdfStore="", oFd=sys.stdout):
        self.oHdfStore = None
        self.oFd = oFd
        if sHdfStore:
            # ugly - active
            self.oHdfStore = pandas.HDFStore(sHdfStore, mode='w')
            self.oFd.write("INFO: hdf store" +self.oHdfStore.filename +'\n')

        self.oRecipe = None
        self.oChefModule = None 
Example 18
Project: OpenTrader   Author: OpenTrading   File: Omlette.py    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
def oAddHdfStore(self, sHdfStore):
        if os.path.isabs(sHdfStore):
            assert os.path.isdir(os.path.dirname(sHdfStore)), \
                   "ERROR: directory not found: " +sHdfStore
        self.oHdfStore = pandas.HDFStore(sHdfStore, mode='w')
        self.oFd.write("INFO: hdf store: " +self.oHdfStore.filename +'\n')
        return self.oHdfStore 
Example 19
Project: CalibrationNN   Author: Andres-Hernandez   File: data_utils.py    License: GNU General Public License v3.0 5 votes vote down vote up
def store_hdf5(file_name, key, val):
    with pd.HDFStore(file_name) as store:
        store[key] = val
        store.close() 
Example 20
Project: CalibrationNN   Author: Andres-Hernandez   File: data_utils.py    License: GNU General Public License v3.0 5 votes vote down vote up
def from_hdf5(key, file_name=h5file):
    with pd.HDFStore(file_name) as store:
        data =  store[key]
        store.close()
    return data 
Example 21
Project: simba   Author: sgoldenlab   File: ROI_reset.py    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
def ROI_reset(inifile, currVid):
    CurrVidName = os.path.basename(currVid).replace('.mp4', '')
    config = ConfigParser()
    configFile = str(inifile)
    config.read(configFile)
    vidInfPath = config.get('General settings', 'project_path')
    logFolderPath = os.path.join(vidInfPath, 'logs')
    ROIcoordinatesPath = os.path.join(logFolderPath, 'measures', 'ROI_definitions.h5')
    try:
        rectanglesInfo = pd.read_hdf(ROIcoordinatesPath, key='rectangles')
        circleInfo = pd.read_hdf(ROIcoordinatesPath, key='circleDf')
        polygonInfo = pd.read_hdf(ROIcoordinatesPath, key='polygons')
        rectangularDf = rectanglesInfo.loc[rectanglesInfo['Video'] == str(CurrVidName)]
        circleDf = circleInfo.loc[circleInfo['Video'] == str(CurrVidName)]
        polygonDf = polygonInfo.loc[polygonInfo['Video'] == str(CurrVidName)]
        ROIdefExist = True
    except FileNotFoundError:
        ROIdefExist = False
        print('Cannot delete ROI definitions: no definitions exist to delete')

    if ROIdefExist is True:
        if (len(rectangularDf) == 0 and len(circleDf) == 0 and len(polygonDf) == 0):
            print('Cannot delete ROI definitions: no records for ' + str(CurrVidName))
        else:
            rectanglesInfo = rectanglesInfo[rectanglesInfo.Video != CurrVidName]
            circleInfo = circleInfo[circleInfo['Video'] != CurrVidName]
            polygonInfo = polygonInfo[polygonInfo['Video'] != CurrVidName]
            store = pd.HDFStore(ROIcoordinatesPath, mode='w')
            store['rectangles'] = rectanglesInfo
            store['circleDf'] = circleInfo
            store['polygons'] = polygonInfo
            print('Deleted ROI record: ' + str(CurrVidName))
            store.close() 
Example 22
Project: news-popularity-prediction   Author: MKLab-ITI   File: feature_rw.py    License: Apache License 2.0 5 votes vote down vote up
def h5_open(path, complevel=0, complib="bzip2"):
    """
    Returns an h5 file store handle managed via pandas.

    :param path: The path of the h5 file store.
    :param complevel: Compression level (0-9).
    :param complib: Library used for compression.
    :return: store: The h5 file store handle.
    """
    store = pd.HDFStore(path, complevel=0, complib="bzip2")
    return store 
Example 23
def read_dataframe_h5(filename, logger):
    with pd.HDFStore(filename, mode='r') as store:
        df = store.select('data')
    logger.info("Read dataset from the store")
    return df 
Example 24
Project: autodeepnet   Author: autodeepnet   File: data_utils.py    License: MIT License 5 votes vote down vote up
def save_hdf5_data(file_path, data_frame, **kwargs):
    pandas_format = kwargs.get('pandas_format', True)
    key = kwargs.get('key', 'data')
    mode = kwargs.get('mode', 'a')
    format = kwargs.get('format', 'table')
    append = kwargs.get('append', False)
    logger.info("Opening HDF5 file {} to write data...".format(file_path))
    try:
        if pandas_format:
            with pd.HDFStore(file_path, mode=mode) as f:
                if key in f and not append:
                    f.remove(key)
                f.put(key=key, value=data_frame, format=format, append=append)
        else:
            if key == None:
                logger.error("Need a key when saving as default HDF5 format")
                raise exceptions.FileSaveError
            with h5py.File(file_path, mode) as f:
                if key in f:
                    if append:
                        data_frame = pd.concat((pd.DataFrame(f[key]), data_frame))
                    del f[key]
                f.create_dataset(key, data=data_frame.values)
    except Exception as e:
        logger.exception("Failed with Error {0}".format(e))
        raise exceptions.FileSaveError
    logger.info("Successfully saved hdf5 data") 
Example 25
Project: ibis   Author: ibis-project   File: hdf5.py    License: Apache License 2.0 5 votes vote down vote up
def table(self, name, path):
        if name not in self.list_tables(path):
            raise AttributeError(name)

        # get the schema
        with pd.HDFStore(str(path), mode='r') as store:
            df = store.select(name, start=0, stop=0)
            schema = sch.infer(df)

        t = self.table_class(name, schema, self).to_expr()
        self.dictionary[name] = path
        return t 
Example 26
Project: ibis   Author: ibis-project   File: hdf5.py    License: Apache License 2.0 5 votes vote down vote up
def list_tables(self, path=None):
        # tables are individual tables within a file

        if path is None:
            path = self.root

        if path.is_file() and str(path).endswith(self.extension):

            with pd.HDFStore(str(path), mode='r') as store:
                # strip leading /
                return [k[1:] for k in store.keys()]

        return [] 
Example 27
Project: vivarium   Author: ihmeuw   File: hdf.py    License: GNU General Public License v3.0 5 votes vote down vote up
def write(path: Union[str, Path], entity_key: str, data: Any):
    """Writes data to the HDF file at the given path to the given key.

    Parameters
    ----------
    path
        The path to the HDF file to write to.
    entity_key
        A string representation of the internal HDF path where we want to
        write the data. The key must be formatted as ``"type.name.measure"``
        or ``"type.measure"``.
    data
        The data to write. If it is a :mod:`pandas` object, it will be
        written using a :class:`pandas.HDFStore` or :func:`pandas.to_hdf`.
        If it is some other kind of python object, it will first be encoded
        as json with :func:`json.dumps` and then written to the provided
        key.

    Raises
    ------
    ValueError
        If the path or entity_key are improperly formatted.

    """
    path = _get_valid_hdf_path(path)
    entity_key = EntityKey(entity_key)

    if isinstance(data, PandasObj):
        _write_pandas_data(path, entity_key, data)
    else:
        _write_json_blob(path, entity_key, data) 
Example 28
Project: pyiron   Author: pyiron   File: hdfio.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def open(self, **kwargs):
        """
        Open the file in the specified mode - copied from pandas.HDFStore.open()

        Args:
            **kwargs: mode : {'a', 'w', 'r', 'r+'}, default 'a'
                      See HDFStore docstring or tables.open_file for info about modes
        Returns:
            HDFStoreIO: self - in contrast to the original implementation in pandas.
        """
        super(HDFStoreIO, self).open(**kwargs)
        return self 
Example 29
Project: prme   Author: flaviovdf   File: dataio.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def save_model(out_fpath, model):
    store = pd.HDFStore(out_fpath, 'w')
    for model_key in model:
        model_val = model[model_key]
        
        if type(model_val) == np.ndarray:
            store[model_key] = pd.DataFrame(model_val)
        else:
            store[model_key] = pd.DataFrame(model_val.items(), \
                    columns=['Name', 'Id'])
    store.close() 
Example 30
Project: prme   Author: flaviovdf   File: mrr.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def main(model, out_fpath):
    store = pd.HDFStore(model)
    
    from_ = store['from_'][0][0]
    to = store['to'][0][0]
    assert from_ == 0
    
    trace_fpath = store['trace_fpath'][0][0]

    XP_hk = store['XP_hk'].values
    XP_ok = store['XP_ok'].values
    XG_ok = store['XG_ok'].values
    alpha = store['alpha'].values[0][0]
    tau = store['tau'].values[0][0]

    hyper2id = dict(store['hyper2id'].values)
    obj2id = dict(store['obj2id'].values)
    
    HSDs = []
    dts = []

    with open(trace_fpath) as trace_file:
        for i, l in enumerate(trace_file): 
            if i < to:
                continue

            dt, h, s, d = l.strip().split('\t')
            if h in hyper2id and s in obj2id and d in obj2id:
                dts.append(float(dt))
                HSDs.append([hyper2id[h], obj2id[s], obj2id[d]])
    
    num_queries = min(10000, len(HSDs))
    queries = np.random.choice(len(HSDs), size=num_queries)
    
    dts = np.array(dts, order='C', dtype='d')
    HSDs = np.array(HSDs, order='C', dtype='i4')
    rrs = mrr.compute(dts, HSDs, XP_hk, XP_ok, XG_ok, alpha, tau)
    
    np.savetxt(out_fpath, rrs)
    store.close()