Python h5py.Dataset() Examples
The following are 30
code examples of h5py.Dataset().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
h5py
, or try the search function
.
Example #1
Source File: h5ad.py From anndata with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _read_raw( f: Union[h5py.File, AnnDataFileManager], as_sparse: Collection[str] = (), rdasp: Callable[[h5py.Dataset], sparse.spmatrix] = None, *, attrs: Collection[str] = ("X", "var", "varm"), ): if as_sparse: assert rdasp is not None, "must supply rdasp if as_sparse is supplied" raw = {} if "X" in attrs and "raw/X" in f: read_x = rdasp if "raw/X" in as_sparse else read_attribute raw["X"] = read_x(f["raw/X"]) for v in ("var", "varm"): if v in attrs and f"raw/{v}" in f: raw[v] = read_attribute(f[f"raw/{v}"]) return _read_legacy_raw(f, raw, read_dataframe, read_attribute, attrs=attrs)
Example #2
Source File: hdf5.py From Pyslvs-UI with GNU Affero General Public License v3.0 | 6 votes |
def _h5py_load(f: Group) -> Mapping[str, Any]: """Load function for h5py.""" data = {} for k, v in f.items(): # type: str, Union[Group, Dataset] if type(v) is Group: data[k] = _h5py_load(v) elif type(v) is Dataset: value = v[()] if type(value) is void: value = _decompress(value) if value.startswith(b's'): value = value.decode('utf-8')[1:] elif value.startswith(b'!'): value = eval(value.decode('utf-8')[1:]) data[k] = value return data
Example #3
Source File: _pyanitools.py From torchani with MIT License | 6 votes |
def h5py_dataset_iterator(self, g, prefix=''): """Group recursive iterator Iterate through all groups in all branches and return datasets in dicts) """ for key in g.keys(): item = g[key] path = '{}/{}'.format(prefix, key) keys = [i for i in item.keys()] if isinstance(item[keys[0]], h5py.Dataset): # test for dataset data = {'path': path} for k in keys: if not isinstance(item[k], h5py.Group): dataset = np.array(item[k][()]) if isinstance(dataset, np.ndarray): if dataset.size != 0: if isinstance(dataset[0], np.bytes_): dataset = [a.decode('ascii') for a in dataset] data.update({k: dataset}) yield data else: # test for group (go down) yield from self.h5py_dataset_iterator(item, path)
Example #4
Source File: test_deprecations.py From anndata with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_force_dense_deprecated(tmp_path): dense_pth = tmp_path / "dense.h5ad" adata = AnnData(X=sparse.random(10, 10, format="csr")) adata.raw = adata with pytest.warns(FutureWarning): adata.write_h5ad(dense_pth, force_dense=True) with h5py.File(dense_pth, "r") as f: assert isinstance(f["X"], h5py.Dataset) assert isinstance(f["raw/X"], h5py.Dataset) dense = ad.read_h5ad(dense_pth) assert isinstance(dense.X, np.ndarray) assert isinstance(dense.raw.X, np.ndarray) assert_equal(adata, dense) ####################################### # Dealing with uns adj matrices #######################################
Example #5
Source File: test_io_conversion.py From anndata with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_sparse_to_dense_inplace(tmp_path, spmtx_format): pth = tmp_path / "adata.h5ad" orig = gen_adata((50, 50), spmtx_format) orig.raw = orig orig.write(pth) backed = ad.read_h5ad(pth, backed="r+") backed.write(as_dense=("X", "raw/X")) new = ad.read_h5ad(pth) assert_equal(orig, new) assert_equal(backed, new) assert isinstance(new.X, np.ndarray) assert isinstance(new.raw.X, np.ndarray) assert isinstance(orig.X, spmtx_format) assert isinstance(orig.raw.X, spmtx_format) assert isinstance(backed.X, h5py.Dataset) assert isinstance(backed.raw.X, h5py.Dataset)
Example #6
Source File: compartment_reader.py From sonata with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, pop_grp, pop_name): self._data_grp = pop_grp['data'] self._mapping = pop_grp['mapping'] self._population = pop_name self._gid2data_table = {} if self._mapping is None: raise Exception('could not find /mapping group') gids_ds = self._mapping[self.node_ids_ds] # ['node_ids'] index_pointer_ds = self._mapping['index_pointer'] for indx, gid in enumerate(gids_ds): self._gid2data_table[gid] = slice(index_pointer_ds[indx], index_pointer_ds[indx+1]) time_ds = self._mapping['time'] self._t_start = np.float(time_ds[0]) self._t_stop = np.float(time_ds[1]) self._dt = np.float(time_ds[2]) self._n_steps = int((self._t_stop - self._t_start) / self._dt) self._custom_cols = {col: grp for col, grp in self._mapping.items() if col not in self.sonata_columns and isinstance(grp, h5py.Dataset)}
Example #7
Source File: h5ad.py From anndata with BSD 3-Clause "New" or "Revised" License | 6 votes |
def write_sparse_as_dense(f, key, value, dataset_kwargs=MappingProxyType({})): real_key = None # Flag for if temporary key was used if key in f: if ( isinstance(value, (h5py.Group, h5py.Dataset, SparseDataset)) and value.file.filename == f.filename ): # Write to temporary key before overwriting real_key = key # Transform key to temporary, e.g. raw/X -> raw/_X, or X -> _X key = re.sub(r"(.*)(\w(?!.*/))", r"\1_\2", key.rstrip("/")) else: del f[key] # Wipe before write dset = f.create_dataset(key, shape=value.shape, dtype=value.dtype, **dataset_kwargs) compressed_axis = int(isinstance(value, sparse.csc_matrix)) for idx in idx_chunks_along_axis(value.shape, compressed_axis, 1000): dset[idx] = value[idx].toarray() if real_key is not None: del f[real_key] f[real_key] = f[key] del f[key]
Example #8
Source File: h5ad.py From anndata with BSD 3-Clause "New" or "Revised" License | 6 votes |
def read_dataset(dataset: h5py.Dataset): value = dataset[()] if not hasattr(value, "dtype"): return value elif isinstance(value.dtype, str): pass elif issubclass(value.dtype.type, np.string_): value = value.astype(str) # Backwards compat, old datasets have strings as one element 1d arrays if len(value) == 1: return value[0] elif len(value.dtype.descr) > 1: # Compound dtype # For backwards compat, now strings are written as variable length value = _from_fixed_length_strings(value) if value.shape == (): value = value[()] return value
Example #9
Source File: test_dataset.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_float(self): """ Scaleoffset filter works for floating point data """ scalefac = 4 shape = (100, 300) range = 20*10**scalefac testdata = (np.random.rand(*shape)-0.5)*range dset = self.f.create_dataset('foo', shape, dtype=float, scaleoffset=scalefac) # Dataset reports that scaleoffset is in use assert dset.scaleoffset is not None # Dataset round-trips dset[...] = testdata filename = self.f.filename self.f.close() self.f = h5py.File(filename, 'r') readdata = self.f['foo'][...] # Test that data round-trips to requested precision self.assertArrayEqual(readdata, testdata, precision=10**(-scalefac)) # Test that the filter is actually active (i.e. compression is lossy) assert not (readdata == testdata).all()
Example #10
Source File: save.py From chainer with MIT License | 6 votes |
def save_parameters_as_hdf5(model, filename='model.h5'): # Save the model parameters into a HDF5 archive chainer.serializers.save_hdf5(filename, model) print('model.h5 saved!\n') # Load the saved HDF5 using h5py print('--- The list of saved params in model.h5 ---') f = h5py.File('model.h5', 'r') for param_key, param in f.items(): msg = '{}:'.format(param_key) if isinstance(param, h5py.Dataset): msg += ' {}'.format(param.shape) print(msg) if isinstance(param, h5py.Group): for child_key, child in param.items(): print(' {}:{}'.format(child_key, child.shape)) print('---------------------------------------------\n')
Example #11
Source File: io.py From pwtools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def read_h5(fn): """Read h5 file into dict. Dict keys are the group + dataset names, e.g. '/a/b/c/dset'. All keys start with a leading slash even if written without (see :func:`write_h5`). Parameters ---------- fn : str filename Examples -------- >>> read_h5('foo.h5').keys() ['/a/b/d1', '/a/b/d2', '/a/c/d3', '/x/y/z'] """ fh = h5py.File(fn, mode='r') dct = {} def get(name, obj, dct=dct): if isinstance(obj, h5py.Dataset): _name = name if name.startswith('/') else '/'+name dct[_name] = obj[()] fh.visititems(get) fh.close() return dct
Example #12
Source File: hdf.py From wradlib with MIT License | 6 votes |
def from_hdf5(fpath, dataset="data"): """Loading data from hdf5 files that was stored by \ :meth:`~wradlib.io.to_hdf5` Parameters ---------- fpath : string path to the hdf5 file dataset : string name of the Dataset in which the data is stored """ f = h5py.File(fpath, mode="r") # Check whether Dataset exists if dataset not in f.keys(): raise KeyError( "WRADLIB: Cannot read Dataset <%s> from hdf5 file " "<%s>" % (dataset, f) ) data = np.array(f[dataset][:]) # get metadata metadata = {} for key in f[dataset].attrs.keys(): metadata[key] = f[dataset].attrs[key] f.close() return data, metadata
Example #13
Source File: pyanitools.py From ANI1_dataset with MIT License | 6 votes |
def h5py_dataset_iterator(self,g, prefix=''): for key in g.keys(): item = g[key] path = '{}/{}'.format(prefix, key) keys = [i for i in item.keys()] if isinstance(item[keys[0]], h5py.Dataset): # test for dataset data = {'path':path} for k in keys: if not isinstance(item[k], h5py.Group): dataset = np.array(item[k].value) if type(dataset) is np.ndarray: if dataset.size != 0: if type(dataset[0]) is np.bytes_: dataset = [a.decode('ascii') for a in dataset] data.update({k:dataset}) yield data else: # test for group (go down) yield from self.h5py_dataset_iterator(item, path)
Example #14
Source File: test_dataset.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_int_with_minbits_lossy(self): """ Scaleoffset filter works for integer data with specified precision """ nbits = 12 shape = (100, 300) testdata = np.random.randint(0, 2**(nbits+1)-1, size=shape) dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=nbits) # Dataset reports scaleoffset enabled with correct precision self.assertTrue(dset.scaleoffset == 12) # Data can be written and read dset[...] = testdata filename = self.f.filename self.f.close() self.f = h5py.File(filename, 'r') readdata = self.f['foo'][...] # Compression is lossy assert not (readdata == testdata).all()
Example #15
Source File: hdf5_01.py From hangar-py with Apache License 2.0 | 6 votes |
def __init__(self, repo_path: Path, schema_shape: tuple, schema_dtype: np.dtype): self.path: Path = repo_path self.schema_shape: tuple = schema_shape self.schema_dtype: np.dtype = schema_dtype self._dflt_backend_opts: Optional[dict] = None self.rFp: HDF5_01_MapTypes = {} self.wFp: HDF5_01_MapTypes = {} self.Fp: HDF5_01_MapTypes = ChainMap(self.rFp, self.wFp) self.rDatasets = SizedDict(maxsize=100) self.wdset: h5py.Dataset = None self.mode: Optional[str] = None self.hIdx: Optional[int] = None self.w_uid: Optional[str] = None self.hMaxSize: Optional[int] = None self.hNextPath: Optional[int] = None self.hColsRemain: Optional[int] = None self.STAGEDIR: Path = Path(self.path, DIR_DATA_STAGE, _FmtCode) self.REMOTEDIR: Path = Path(self.path, DIR_DATA_REMOTE, _FmtCode) self.DATADIR: Path = Path(self.path, DIR_DATA, _FmtCode) self.STOREDIR: Path = Path(self.path, DIR_DATA_STORE, _FmtCode) self.DATADIR.mkdir(exist_ok=True)
Example #16
Source File: hdf5_00.py From hangar-py with Apache License 2.0 | 6 votes |
def __init__(self, repo_path: Path, schema_shape: tuple, schema_dtype: np.dtype): self.path: Path = repo_path self.schema_shape: tuple = schema_shape self.schema_dtype: np.dtype = schema_dtype self._dflt_backend_opts: Optional[dict] = None self.rFp: HDF5_00_MapTypes = {} self.wFp: HDF5_00_MapTypes = {} self.Fp: HDF5_00_MapTypes = ChainMap(self.rFp, self.wFp) self.rDatasets = SizedDict(maxsize=100) self.wdset: Optional[h5py.Dataset] = None self.mode: Optional[str] = None self.hIdx: Optional[int] = None self.w_uid: Optional[str] = None self.hMaxSize: Optional[int] = None self.hNextPath: Optional[int] = None self.hColsRemain: Optional[int] = None self.STAGEDIR: Path = Path(self.path, DIR_DATA_STAGE, _FmtCode) self.REMOTEDIR: Path = Path(self.path, DIR_DATA_REMOTE, _FmtCode) self.STOREDIR: Path = Path(self.path, DIR_DATA_STORE, _FmtCode) self.DATADIR: Path = Path(self.path, DIR_DATA, _FmtCode) self.DATADIR.mkdir(exist_ok=True)
Example #17
Source File: pyanitools.py From deepchem with MIT License | 6 votes |
def h5py_dataset_iterator(self, g, prefix=''): for key in g.keys(): item = g[key] path = '{}/{}'.format(prefix, key) keys = [i for i in item.keys()] if isinstance(item[keys[0]], h5py.Dataset): # test for dataset data = {'path': path} for k in keys: if not isinstance(item[k], h5py.Group): dataset = np.array(item[k].value) if type(dataset) is np.ndarray: if dataset.size != 0: if type(dataset[0]) is np.bytes_: dataset = [a.decode('ascii') for a in dataset] data.update({k: dataset}) yield data else: # test for group (go down) for s in self.h5py_dataset_iterator(item, path): yield s
Example #18
Source File: omas_h5.py From omas with MIT License | 6 votes |
def convertDataset(ods, data): ''' Recursive utility function to map HDF5 structure to ODS :param ods: input ODS to be populated :param data: HDF5 dataset of group ''' import h5py keys = data.keys() try: keys = sorted(list(map(int, keys))) except ValueError: pass for oitem in keys: item = str(oitem) if item.endswith('_error_upper'): continue if isinstance(data[item], h5py.Dataset): ods[item] = data[item][()] if item + '_error_upper' in data: ods[item] = uarray(ods[item], data[item + '_error_upper'][()]) elif isinstance(data[item], h5py.Group): convertDataset(ods[oitem], data[item])
Example #19
Source File: test_dataset.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_int(self): """ Scaleoffset filter works for integer data with default precision """ nbits = 12 shape = (100, 300) testdata = np.random.randint(0, 2**nbits-1, size=shape) # Create dataset; note omission of nbits (for library-determined precision) dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=True) # Dataset reports scaleoffset enabled assert dset.scaleoffset is not None # Data round-trips correctly and identically dset[...] = testdata filename = self.f.filename self.f.close() self.f = h5py.File(filename, 'r') readdata = self.f['foo'][...] self.assertArrayEqual(readdata, testdata)
Example #20
Source File: to_hdf5.py From mars with Apache License 2.0 | 6 votes |
def tohdf5(hdf5_file, x, group=None, dataset=None, **kwds): import h5py x = astensor(x) if isinstance(hdf5_file, h5py.Dataset): filename = hdf5_file.file.filename group = hdf5_file.parent.name dataset = hdf5_file.name.rsplit('/', 1)[1] elif isinstance(hdf5_file, h5py.File): filename = hdf5_file.filename if dataset is None: raise ValueError('`dataset` should be provided') elif isinstance(hdf5_file, str): filename = hdf5_file if dataset is None: raise ValueError('`dataset` should be provided') else: raise TypeError('`hdf5_file` passed has wrong type, ' 'expect str, h5py.File or h5py.Dataset, ' 'got {}'.format(type(hdf5_file))) op = TensorHDF5DataStore(filename=filename, group=group, dataset=dataset, dataset_kwds=kwds) return op(x)
Example #21
Source File: hdsortsortingextractor.py From spikeextractors with MIT License | 6 votes |
def _parse_units(file, _units): import h5py t_units = {} if isinstance(_units, h5py.Group): for name in _units.keys(): value = _units[name] dict_val = [] for val in value: if isinstance(file[val[0]], h5py.Dataset): dict_val.append(file[val[0]][()]) t_units[name] = dict_val else: break out = [dict(zip(t_units, col)) for col in zip(*t_units.values())] else: out = [] for unit in _units: group = file[unit[()][0]] unit_dict = {} for k in group.keys(): unit_dict[k] = group[k][()] out.append(unit_dict) return out
Example #22
Source File: test.py From ldpred with MIT License | 6 votes |
def h5_node_walker(h5_node, key_prefix=''): """Generator function that walks an hdf5 File or Group object. Args: h5_node: an h5py.File or h5py.Group object key_prefix: the '/' delimited string representing the name path of the node within the .hdf5 file. Yields: (child_key, child_value) """ for k, v in h5_node.items(): v_type = type(v) v_path = key_prefix + '/' + k if v_type == h5py.Group: for nested_key, nested_value in h5_node_walker(v, v_path): yield nested_key, nested_value elif v_type == h5py.Dataset: yield v_path, v[...] else: assert False, 'Unexpected v_type: %s' % v_type
Example #23
Source File: file_backing.py From anndata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __getitem__(self, key: str) -> Union[h5py.Group, h5py.Dataset, SparseDataset]: return self._file[key]
Example #24
Source File: io.py From mxnet-lambda with Apache License 2.0 | 5 votes |
def _init_data(data, allow_empty, default_name): """Convert data into canonical form.""" assert (data is not None) or allow_empty if data is None: data = [] if isinstance(data, (np.ndarray, NDArray, h5py.Dataset) if h5py else (np.ndarray, NDArray)): data = [data] if isinstance(data, list): if not allow_empty: assert(len(data) > 0) if len(data) == 1: data = OrderedDict([(default_name, data[0])]) # pylint: disable=redefined-variable-type else: data = OrderedDict( # pylint: disable=redefined-variable-type [('_%d_%s' % (i, default_name), d) for i, d in enumerate(data)]) if not isinstance(data, dict): raise TypeError("Input must be NDArray, numpy.ndarray, h5py.Dataset " + \ "a list of them or dict with them as values") for k, v in data.items(): if not isinstance(v, (NDArray, h5py.Dataset) if h5py else NDArray): try: data[k] = array(v) except: raise TypeError(("Invalid type '%s' for %s, " % (type(v), k)) + \ "should be NDArray, numpy.ndarray or h5py.Dataset") return list(data.items())
Example #25
Source File: cosmodc2.py From gcr-catalogs with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _add_to_native_quantity_collector(name, obj, collector): if isinstance(obj, h5py.Dataset): collector.add(name)
Example #26
Source File: io.py From mxnet-lambda with Apache License 2.0 | 5 votes |
def __init__(self, data, label=None, batch_size=1, shuffle=False, last_batch_handle='pad', data_name='data', label_name='softmax_label'): super(NDArrayIter, self).__init__(batch_size) self.data = _init_data(data, allow_empty=False, default_name=data_name) self.label = _init_data(label, allow_empty=True, default_name=label_name) if isinstance(data, CSRNDArray) or isinstance(label, CSRNDArray): assert(shuffle is False), \ "`NDArrayIter` only supports ``CSRNDArray`` with `shuffle` set to `False`" assert(last_batch_handle == 'discard'), "`NDArrayIter` only supports ``CSRNDArray``" \ " with `last_batch_handle` set to `discard`." self.idx = np.arange(self.data[0][1].shape[0]) # shuffle data if shuffle: np.random.shuffle(self.idx) self.data = [(k, array(v.asnumpy()[self.idx], v.context)) if not (isinstance(v, h5py.Dataset) if h5py else False) else (k, v) for k, v in self.data] self.label = [(k, array(v.asnumpy()[self.idx], v.context)) if not (isinstance(v, h5py.Dataset) if h5py else False) else (k, v) for k, v in self.label] # batching if last_batch_handle == 'discard': new_n = self.data[0][1].shape[0] - self.data[0][1].shape[0] % batch_size self.idx = self.idx[:new_n] self.data_list = [x[1] for x in self.data] + [x[1] for x in self.label] self.num_source = len(self.data_list) self.num_data = self.idx.shape[0] assert self.num_data >= batch_size, \ "batch_size needs to be smaller than data size." self.cursor = -batch_size self.batch_size = batch_size self.last_batch_handle = last_batch_handle
Example #27
Source File: importer.py From nnabla with Apache License 2.0 | 5 votes |
def load_parameters(self, filename): e = os.path.splitext(filename)[1].lower() if e == '.h5': import h5py with h5py.File(filename, 'r') as hd: keys = [] def _get_keys(name): ds = hd[name] if not isinstance(ds, h5py.Dataset): # Group return # To preserve order of parameters keys.append((ds.attrs.get('index', None), name)) hd.visit(_get_keys) for _, key in sorted(keys): ds = hd[key] parameter = self._nnp.parameter.add() parameter.variable_name = key parameter.shape.dim.extend(ds.shape) parameter.data.extend(ds[...].flatten()) if ds.attrs['need_grad']: parameter.need_grad = True else: parameter.need_grad = False elif e == '.protobuf': with open(filename, 'rb') as f: self._nnp.MergeFromString(f.read())
Example #28
Source File: anndata.py From anndata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def X(self) -> Optional[Union[np.ndarray, sparse.spmatrix, ArrayView]]: """Data matrix of shape :attr:`n_obs` × :attr:`n_vars`.""" if self.isbacked: if not self.file.is_open: self.file.open() X = self.file["X"] if isinstance(X, h5py.Group): X = SparseDataset(X) # TODO: This should get replaced/ handled elsewhere # This is so that we can index into a backed dense dataset with # indices that aren’t strictly increasing if self.is_view and isinstance(X, h5py.Dataset): ordered = [self._oidx, self._vidx] # this will be mutated rev_order = [slice(None), slice(None)] for axis, axis_idx in enumerate(ordered.copy()): if isinstance(axis_idx, np.ndarray) and axis_idx.dtype.type != bool: order = np.argsort(axis_idx) ordered[axis] = axis_idx[order] rev_order[axis] = np.argsort(order) # from hdf5, then to real order X = X[tuple(ordered)][tuple(rev_order)] elif self.is_view: X = X[self._oidx, self._vidx] elif self.is_view: X = as_view( _subset(self._adata_ref.X, (self._oidx, self._vidx)), ElementRef(self, "X"), ) else: X = self._X return X # if self.n_obs == 1 and self.n_vars == 1: # return X[0, 0] # elif self.n_obs == 1 or self.n_vars == 1: # if issparse(X): X = X.toarray() # return X.flatten() # else: # return X
Example #29
Source File: file_backing.py From anndata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __setitem__( self, key: str, value: Union[h5py.Group, h5py.Dataset, SparseDataset] ): self._file[key] = value
Example #30
Source File: sparse_dataset.py From anndata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def copy(self) -> ss.spmatrix: if isinstance(self.data, h5py.Dataset): return SparseDataset(self.data.parent).to_memory() else: return super().copy()