Python pandas.HDFStore() Examples
The following are 30
code examples of pandas.HDFStore().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.

Example #1
Source File: Omlette.py From OpenTrader with GNU Lesser General Public License v3.0 | 6 votes |
def iMain(): """ Read an hdf file generated by us to make sure we can recover its content and structure. Give the name of an hdf5 file as a command-line argument. """ assert sys.argv, __doc__ sFile = sys.argv[1] assert os.path.isfile(sFile) oHdfStore = pandas.HDFStore(sFile, mode='r') print oHdfStore.groups() # bug - no return value # oSignals = pandas.read_hdf(oHdfStore, '/servings/signals') mSignals = oHdfStore.select('/recipe/servings/mSignals', auto_close=False) print mSignals print oHdfStore.get_node('/recipe')._v_attrs.metadata[0]['sUrl']
Example #2
Source File: utils.py From avocado with MIT License | 6 votes |
def _create_csi_index(store, key, column_name): """Create a CSI index on a column in an HDF5 file. The column must have been already specified in the data_columns call to to_hdf or it won't be stored correctly in the HDF5 file. Parameters ---------- store : :class:`pandas.HDFStore` An HDF5 file opened as an instance of a :class:`pandas.HDFStore` object. key : str The key of the DataFrame to use. column_name : str The column to add a CSI index to. """ key_store = store.get_storer(key) use_name = _map_column_name(key_store, column_name) column = key_store.table.colinstances[use_name] if not column.index.is_csi: column.remove_index() column.create_csindex()
Example #3
Source File: dataset.py From avocado with MIT License | 6 votes |
def write_models(self, tag=None): """Write the models of the light curves to disk. The models will be stored in the features directory using the dataset's name and the given features tag. Note that for now the models are stored as individual tables in the HDF5 file because there doesn't appear to be a good way to store fixed length arrays in pandas. WARNING: This is not the best way to implement this, and there are definitely much better ways. This also isn't thread-safe at all. Parameters ---------- tag : str (optional) The tag for this version of the features. By default, this will use settings['features_tag']. """ models_path = self.get_models_path(tag=tag) store = pd.HDFStore(models_path, "a") for model_name, model in self.models.items(): model.to_hdf(store, model_name, mode="a") store.close()
Example #4
Source File: hdf.py From vivarium with GNU General Public License v3.0 | 6 votes |
def _write_pandas_data(path: Path, entity_key: EntityKey, data: Union[PandasObj]): """Write data in a pandas format to an HDF file. This method currently supports :class:`pandas DataFrame` objects, with or with or without columns, and :class:`pandas.Series` objects. """ if data.empty: # Our data is indexed, sometimes with no other columns. This leaves an # empty dataframe that store.put will silently fail to write in table # format. data = data.reset_index() if data.empty: raise ValueError("Cannot write an empty dataframe that does not have an index.") metadata = {'is_empty': True} data_columns = True else: metadata = {'is_empty': False} data_columns = None with pd.HDFStore(str(path), complevel=9) as store: store.put(entity_key.path, data, format="table", data_columns=data_columns) store.get_storer(entity_key.path).attrs.metadata = metadata # NOTE: must use attrs. write this up
Example #5
Source File: burstlib_ext.py From FRETBursts with GNU General Public License v2.0 | 6 votes |
def _store_bg_data(store, base_name, min_ph_delays_us, best_bg, best_th, BG_data, BG_data_e): if not base_name.endswith('/'): base_name = base_name + '/' store_name = store.filename group_name = '/' + base_name[:-1] store.create_carray(group_name, 'min_ph_delays_us', obj=min_ph_delays_us, createparents=True) for ph_sel, values in BG_data.items(): store.create_carray(group_name, str(ph_sel), obj=values) for ph_sel, values in BG_data_e.items(): store.create_carray(group_name, str(ph_sel) + '_err', obj=values) store.close() store = pd.HDFStore(store_name) store[base_name + 'best_bg'] = best_bg store[base_name + 'best_th'] = best_th store.close()
Example #6
Source File: burstlib_ext.py From FRETBursts with GNU General Public License v2.0 | 6 votes |
def _load_bg_data(store, base_name, ph_streams): if not base_name.endswith('/'): base_name = base_name + '/' store_name = store.filename group_name = '/' + base_name[:-1] min_ph_delays = store.get_node(group_name, 'min_ph_delays_us')[:] BG_data = {} for ph_sel in ph_streams: BG_data[ph_sel] = store.get_node(group_name, str(ph_sel))[:] BG_data_e = {} for ph_sel in ph_streams: BG_data_e[ph_sel] = store.get_node(group_name, str(ph_sel) + '_err')[:] store.close() store = pd.HDFStore(store_name) best_bg = store[base_name + 'best_bg'] best_th = store[base_name + 'best_th'] store.close() return best_th, best_bg, BG_data, BG_data_e, min_ph_delays
Example #7
Source File: panda.py From twint with MIT License | 6 votes |
def save(_filename, _dataframe, **options): if options.get("dataname"): _dataname = options.get("dataname") else: _dataname = "twint" if not options.get("type"): with warnings.catch_warnings(): warnings.simplefilter("ignore") _store = pd.HDFStore(_filename + ".h5") _store[_dataname] = _dataframe _store.close() elif options.get("type") == "Pickle": with warnings.catch_warnings(): warnings.simplefilter("ignore") _dataframe.to_pickle(_filename + ".pkl") else: print("""Please specify: filename, DataFrame, DataFrame name and type (HDF5, default, or Pickle)""")
Example #8
Source File: panda.py From twint with MIT License | 6 votes |
def read(_filename, **options): if not options.get("dataname"): _dataname = "twint" else: _dataname = options.get("dataname") if not options.get("type"): _store = pd.HDFStore(_filename + ".h5") _df = _store[_dataname] return _df elif options.get("type") == "Pickle": _df = pd.read_pickle(_filename + ".pkl") return _df else: print("""Please specify: DataFrame, DataFrame name (twint as default), filename and type (HDF5, default, or Pickle""")
Example #9
Source File: dc2_object.py From gcr-catalogs with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _open_hdf5(self, file_path): """Return the file handle of an HDF5 file as an pd.HDFStore object Cache and return the file handle for the HDF5 file at <file_path> Args: file_path (str): The path of the desired file Return: The cached file handle """ if (file_path not in self._file_handles or not self._file_handles[file_path].is_open): self._file_handles[file_path] = pd.HDFStore(file_path, 'r') return self._file_handles[file_path]
Example #10
Source File: hlatyper.py From OptiType with BSD 3-Clause "New" or "Revised" License | 6 votes |
def store_dataframes(out_hdf, **kwargs): # DataFrames to serialize have to be passed by keyword arguments. An argument matrix1=DataFrame(...) # will be written into table 'matrix1' in the HDF file. complevel = kwargs.pop('complevel', 9) # default complevel & complib values if complib = kwargs.pop('complib', 'zlib') # not explicitly asked for as arguments if VERBOSE: print(now(), 'Storing %d DataFrames in file %s with compression settings %d %s...' % (len(kwargs), out_hdf, complevel, complib)) store = pd.HDFStore(out_hdf, complevel=complevel, complib=complib) # TODO: WRITE ONLY? it probably appends now for table_name, dataframe in kwargs.items(): store[table_name] = dataframe store.close() if VERBOSE: print(now(), 'DataFrames stored in file.')
Example #11
Source File: test_orca.py From orca with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_write_tables(df, store_name): orca.add_table('table', df) @orca.step() def step(table): pass step_tables = orca.get_step_table_names(['step']) orca.write_tables(store_name, step_tables, None) with pd.HDFStore(store_name, mode='r') as store: assert 'table' in store pdt.assert_frame_equal(store['table'], df) orca.write_tables(store_name, step_tables, 1969) with pd.HDFStore(store_name, mode='r') as store: assert '1969/table' in store pdt.assert_frame_equal(store['1969/table'], df)
Example #12
Source File: test_orca.py From orca with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_run_and_write_tables_out_tables_provided(df, store_name): table_names = ['table', 'table2', 'table3'] for t in table_names: orca.add_table(t, df) @orca.step() def step(iter_var, table, table2): return orca.run( ['step'], iter_vars=range(1), data_out=store_name, out_base_tables=table_names, out_run_tables=['table']) with pd.HDFStore(store_name, mode='r') as store: for t in table_names: assert 'base/{}'.format(t) in store assert '0/table' in store assert '0/table2' not in store assert '0/table3' not in store
Example #13
Source File: minute_bars.py From catalyst with Apache License 2.0 | 6 votes |
def write(self, frames): """ Write the frames to the target HDF5 file, using the format used by ``pd.Panel.to_hdf`` Parameters ---------- frames : iter[(int, DataFrame)] or dict[int -> DataFrame] An iterable or other mapping of sid to the corresponding OHLCV pricing data. """ with HDFStore(self._path, 'w', complevel=self._complevel, complib=self._complib) \ as store: panel = pd.Panel.from_dict(dict(frames)) panel.to_hdf(store, 'updates') with tables.open_file(self._path, mode='r+') as h5file: h5file.set_node_attr('/', 'version', 0)
Example #14
Source File: helper.py From tierpsy-tracker with MIT License | 6 votes |
def calculate_bgnd_from_masked_fulldata(masked_image_file): """ - Opens the masked_image_file hdf5 file, reads the /full_data node and creates a "background" by taking the maximum value of each pixel over time. - Parses the file name to find a camera serial number - reads the pixel/um ratio from the masked_image_file """ import numpy as np from tierpsy.helper.params import read_unit_conversions # read attributes of masked_image_file _, (microns_per_pixel, xy_units) , is_light_background = read_unit_conversions(masked_image_file) # get "background" and px2um with pd.HDFStore(masked_image_file, 'r') as fid: assert is_light_background, \ 'MultiWell recognition is only available for brightfield at the moment' img = np.max(fid.get_node('/full_data'), axis=0) camera_serial = parse_camera_serial(masked_image_file) return img, camera_serial, microns_per_pixel
Example #15
Source File: process_ow.py From tierpsy-tracker with MIT License | 6 votes |
def ow_plate_summary(fname): all_feats = read_feat_events(fname) with pd.HDFStore(fname, 'r') as fid: features_timeseries = fid['/features_timeseries'] for cc in features_timeseries: all_feats[cc] = features_timeseries[cc].values wStats = WormStats() exp_feats = wStats.getWormStats(all_feats, np.nanmean) exp_feats = pd.DataFrame(exp_feats) valid_order = [x for x in exp_feats.columns if x not in wStats.extra_fields] exp_feats = exp_feats.loc[:, valid_order] return [exp_feats] #%%
Example #16
Source File: eq_loc.py From pykonal with GNU General Public License v3.0 | 6 votes |
def load_stations(input_file): """ Load and return network geometry from input file. Input file must be HDF5 file created using pandas.HDFStore with a "stations" table that contains "network", "station", "latitude", "longitude", and "elevation" fields. Units of degrees are assumed for "latitude" and "longitude", and units of kilometers are assumed for "elevation". Returns: pandas.DataFrame object with "network", "station", "latitude", "longitude", and "depth" fields. Units of "depth" are kilometers. """ with pd.HDFStore(input_file, mode="r") as store: stations = store["stations"] stations["depth"] = -stations["elevation"] stations = stations[ ["network", "station", "latitude", "longitude", "depth"] ] return (stations)
Example #17
Source File: Omlette.py From OpenTrader with GNU Lesser General Public License v3.0 | 5 votes |
def __init__(self, sHdfStore="", oFd=sys.stdout): self.oHdfStore = None self.oFd = oFd if sHdfStore: # ugly - active self.oHdfStore = pandas.HDFStore(sHdfStore, mode='w') self.oFd.write("INFO: hdf store" +self.oHdfStore.filename +'\n') self.oRecipe = None self.oChefModule = None
Example #18
Source File: Omlette.py From OpenTrader with GNU Lesser General Public License v3.0 | 5 votes |
def oAddHdfStore(self, sHdfStore): if os.path.isabs(sHdfStore): assert os.path.isdir(os.path.dirname(sHdfStore)), \ "ERROR: directory not found: " +sHdfStore self.oHdfStore = pandas.HDFStore(sHdfStore, mode='w') self.oFd.write("INFO: hdf store: " +self.oHdfStore.filename +'\n') return self.oHdfStore
Example #19
Source File: data_utils.py From CalibrationNN with GNU General Public License v3.0 | 5 votes |
def store_hdf5(file_name, key, val): with pd.HDFStore(file_name) as store: store[key] = val store.close()
Example #20
Source File: data_utils.py From CalibrationNN with GNU General Public License v3.0 | 5 votes |
def from_hdf5(key, file_name=h5file): with pd.HDFStore(file_name) as store: data = store[key] store.close() return data
Example #21
Source File: ROI_reset.py From simba with GNU Lesser General Public License v3.0 | 5 votes |
def ROI_reset(inifile, currVid): CurrVidName = os.path.basename(currVid).replace('.mp4', '') config = ConfigParser() configFile = str(inifile) config.read(configFile) vidInfPath = config.get('General settings', 'project_path') logFolderPath = os.path.join(vidInfPath, 'logs') ROIcoordinatesPath = os.path.join(logFolderPath, 'measures', 'ROI_definitions.h5') try: rectanglesInfo = pd.read_hdf(ROIcoordinatesPath, key='rectangles') circleInfo = pd.read_hdf(ROIcoordinatesPath, key='circleDf') polygonInfo = pd.read_hdf(ROIcoordinatesPath, key='polygons') rectangularDf = rectanglesInfo.loc[rectanglesInfo['Video'] == str(CurrVidName)] circleDf = circleInfo.loc[circleInfo['Video'] == str(CurrVidName)] polygonDf = polygonInfo.loc[polygonInfo['Video'] == str(CurrVidName)] ROIdefExist = True except FileNotFoundError: ROIdefExist = False print('Cannot delete ROI definitions: no definitions exist to delete') if ROIdefExist is True: if (len(rectangularDf) == 0 and len(circleDf) == 0 and len(polygonDf) == 0): print('Cannot delete ROI definitions: no records for ' + str(CurrVidName)) else: rectanglesInfo = rectanglesInfo[rectanglesInfo.Video != CurrVidName] circleInfo = circleInfo[circleInfo['Video'] != CurrVidName] polygonInfo = polygonInfo[polygonInfo['Video'] != CurrVidName] store = pd.HDFStore(ROIcoordinatesPath, mode='w') store['rectangles'] = rectanglesInfo store['circleDf'] = circleInfo store['polygons'] = polygonInfo print('Deleted ROI record: ' + str(CurrVidName)) store.close()
Example #22
Source File: feature_rw.py From news-popularity-prediction with Apache License 2.0 | 5 votes |
def h5_open(path, complevel=0, complib="bzip2"): """ Returns an h5 file store handle managed via pandas. :param path: The path of the h5 file store. :param complevel: Compression level (0-9). :param complib: Library used for compression. :return: store: The h5 file store handle. """ store = pd.HDFStore(path, complevel=0, complib="bzip2") return store
Example #23
Source File: dataTransformationProcessing.py From autosklearn-zeroconf with BSD 3-Clause "New" or "Revised" License | 5 votes |
def read_dataframe_h5(filename, logger): with pd.HDFStore(filename, mode='r') as store: df = store.select('data') logger.info("Read dataset from the store") return df
Example #24
Source File: data_utils.py From autodeepnet with MIT License | 5 votes |
def save_hdf5_data(file_path, data_frame, **kwargs): pandas_format = kwargs.get('pandas_format', True) key = kwargs.get('key', 'data') mode = kwargs.get('mode', 'a') format = kwargs.get('format', 'table') append = kwargs.get('append', False) logger.info("Opening HDF5 file {} to write data...".format(file_path)) try: if pandas_format: with pd.HDFStore(file_path, mode=mode) as f: if key in f and not append: f.remove(key) f.put(key=key, value=data_frame, format=format, append=append) else: if key == None: logger.error("Need a key when saving as default HDF5 format") raise exceptions.FileSaveError with h5py.File(file_path, mode) as f: if key in f: if append: data_frame = pd.concat((pd.DataFrame(f[key]), data_frame)) del f[key] f.create_dataset(key, data=data_frame.values) except Exception as e: logger.exception("Failed with Error {0}".format(e)) raise exceptions.FileSaveError logger.info("Successfully saved hdf5 data")
Example #25
Source File: hdf5.py From ibis with Apache License 2.0 | 5 votes |
def table(self, name, path): if name not in self.list_tables(path): raise AttributeError(name) # get the schema with pd.HDFStore(str(path), mode='r') as store: df = store.select(name, start=0, stop=0) schema = sch.infer(df) t = self.table_class(name, schema, self).to_expr() self.dictionary[name] = path return t
Example #26
Source File: hdf5.py From ibis with Apache License 2.0 | 5 votes |
def list_tables(self, path=None): # tables are individual tables within a file if path is None: path = self.root if path.is_file() and str(path).endswith(self.extension): with pd.HDFStore(str(path), mode='r') as store: # strip leading / return [k[1:] for k in store.keys()] return []
Example #27
Source File: hdf.py From vivarium with GNU General Public License v3.0 | 5 votes |
def write(path: Union[str, Path], entity_key: str, data: Any): """Writes data to the HDF file at the given path to the given key. Parameters ---------- path The path to the HDF file to write to. entity_key A string representation of the internal HDF path where we want to write the data. The key must be formatted as ``"type.name.measure"`` or ``"type.measure"``. data The data to write. If it is a :mod:`pandas` object, it will be written using a :class:`pandas.HDFStore` or :func:`pandas.to_hdf`. If it is some other kind of python object, it will first be encoded as json with :func:`json.dumps` and then written to the provided key. Raises ------ ValueError If the path or entity_key are improperly formatted. """ path = _get_valid_hdf_path(path) entity_key = EntityKey(entity_key) if isinstance(data, PandasObj): _write_pandas_data(path, entity_key, data) else: _write_json_blob(path, entity_key, data)
Example #28
Source File: hdfio.py From pyiron with BSD 3-Clause "New" or "Revised" License | 5 votes |
def open(self, **kwargs): """ Open the file in the specified mode - copied from pandas.HDFStore.open() Args: **kwargs: mode : {'a', 'w', 'r', 'r+'}, default 'a' See HDFStore docstring or tables.open_file for info about modes Returns: HDFStoreIO: self - in contrast to the original implementation in pandas. """ super(HDFStoreIO, self).open(**kwargs) return self
Example #29
Source File: dataio.py From prme with BSD 3-Clause "New" or "Revised" License | 5 votes |
def save_model(out_fpath, model): store = pd.HDFStore(out_fpath, 'w') for model_key in model: model_val = model[model_key] if type(model_val) == np.ndarray: store[model_key] = pd.DataFrame(model_val) else: store[model_key] = pd.DataFrame(model_val.items(), \ columns=['Name', 'Id']) store.close()
Example #30
Source File: mrr.py From prme with BSD 3-Clause "New" or "Revised" License | 5 votes |
def main(model, out_fpath): store = pd.HDFStore(model) from_ = store['from_'][0][0] to = store['to'][0][0] assert from_ == 0 trace_fpath = store['trace_fpath'][0][0] XP_hk = store['XP_hk'].values XP_ok = store['XP_ok'].values XG_ok = store['XG_ok'].values alpha = store['alpha'].values[0][0] tau = store['tau'].values[0][0] hyper2id = dict(store['hyper2id'].values) obj2id = dict(store['obj2id'].values) HSDs = [] dts = [] with open(trace_fpath) as trace_file: for i, l in enumerate(trace_file): if i < to: continue dt, h, s, d = l.strip().split('\t') if h in hyper2id and s in obj2id and d in obj2id: dts.append(float(dt)) HSDs.append([hyper2id[h], obj2id[s], obj2id[d]]) num_queries = min(10000, len(HSDs)) queries = np.random.choice(len(HSDs), size=num_queries) dts = np.array(dts, order='C', dtype='d') HSDs = np.array(HSDs, order='C', dtype='i4') rrs = mrr.compute(dts, HSDs, XP_hk, XP_ok, XG_ok, alpha, tau) np.savetxt(out_fpath, rrs) store.close()