Python h5py.check_dtype() Examples
The following are 16
code examples of h5py.check_dtype().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
h5py
, or try the search function
.
Example #1
Source File: test_datatype.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_vlen_enum(self): fname = self.mktemp() arr1 = [[1],[1,2]] dt1 = h5py.special_dtype(vlen=h5py.special_dtype( enum=('i', dict(foo=1, bar=2)))) with h5py.File(fname,'w') as f: df1 = f.create_dataset('test', (len(arr1),), dtype=dt1) df1[:] = np.array(arr1) with h5py.File(fname,'r') as f: df2 = f['test'] dt2 = df2.dtype arr2 = [e.tolist() for e in df2[:]] self.assertEqual(arr1, arr2) self.assertEqual(h5py.check_dtype(enum=h5py.check_dtype(vlen=dt1)), h5py.check_dtype(enum=h5py.check_dtype(vlen=dt2)))
Example #2
Source File: core.py From h5netcdf with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _check_valid_netcdf_dtype(self, dtype, stacklevel=3): dtype = np.dtype(dtype) if dtype == bool: description = 'boolean' elif dtype == complex: description = 'complex' elif h5py.check_dtype(enum=dtype) is not None: description = 'enum' elif h5py.check_dtype(ref=dtype) is not None: description = 'reference' elif h5py.check_dtype(vlen=dtype) not in {None, unicode, bytes}: description = 'non-string variable length' else: description = None if description is not None: _invalid_netcdf_feature('{} dtypes'.format(description), allow=self.invalid_netcdf, file=self, stacklevel=stacklevel + 1)
Example #3
Source File: dask.py From cooler with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _get_group_info(path, grouppath, keys): with h5py.File(path, "r") as f: grp = f[grouppath] if keys is None: keys = list(grp.keys()) nrows = len(grp[keys[0]]) categoricals = {} for key in keys: dt = h5py.check_dtype(enum=grp[key].dtype) if dt is not None: categoricals[key] = sorted(dt, key=dt.__getitem__) # Meta is an empty dataframe that serves as a compound "dtype" meta = pd.DataFrame( {key: np.array([], dtype=grp[key].dtype) for key in keys}, columns=keys ) for key in categoricals: meta[key] = pd.Categorical([], categories=categoricals[key], ordered=True) return nrows, keys, meta, categoricals
Example #4
Source File: test_datatype.py From keras-lambda with MIT License | 6 votes |
def test_vlen_enum(self): fname = self.mktemp() arr1 = [[1],[1,2]] dt1 = h5py.special_dtype(vlen=h5py.special_dtype( enum=('i', dict(foo=1, bar=2)))) with h5py.File(fname,'w') as f: df1 = f.create_dataset('test', (len(arr1),), dtype=dt1) df1[:] = np.array(arr1) with h5py.File(fname,'r') as f: df2 = f['test'] dt2 = df2.dtype arr2 = [e.tolist() for e in df2[:]] self.assertEqual(arr1, arr2) self.assertEqual(h5py.check_dtype(enum=h5py.check_dtype(vlen=dt1)), h5py.check_dtype(enum=h5py.check_dtype(vlen=dt2)))
Example #5
Source File: test_dataset.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_create(self): """ Enum datasets can be created and type correctly round-trips """ dt = h5py.special_dtype(enum=('i', self.EDICT)) ds = self.f.create_dataset('x', (100, 100), dtype=dt) dt2 = ds.dtype dict2 = h5py.check_dtype(enum=dt2) self.assertEqual(dict2, self.EDICT)
Example #6
Source File: test_datatype.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_compound(self): fields = [] fields.append(('field_1', h5py.special_dtype(vlen=str))) fields.append(('field_2', np.int32)) dt = np.dtype(fields) self.f['mytype'] = np.dtype(dt) dt_out = self.f['mytype'].dtype.fields['field_1'][0] self.assertEqual(h5py.check_dtype(vlen=dt_out), str)
Example #7
Source File: main.py From bulkvis with MIT License | 5 votes |
def get_annotations(path, fields, enum_field): data_labels = {} for field in fields: data_labels[field] = path[field] data_dtypes = {} if h5py.check_dtype(enum=path.dtype[enum_field]): dataset_dtype = h5py.check_dtype(enum=path.dtype[enum_field]) # data_dtype may lose some dataset dtypes there are duplicates of 'v' data_dtypes = {v: k for k, v in dataset_dtype.items()} labels_df = pd.DataFrame(data=data_labels) return labels_df, data_dtypes
Example #8
Source File: set_config.py From bulkvis with MIT License | 5 votes |
def get_annotations(path, enum_field): data_dtypes = {} if h5py.check_dtype(enum=path.dtype[enum_field]): data_dtypes = h5py.check_dtype(enum=path.dtype[enum_field]) return data_dtypes
Example #9
Source File: whale_plot.py From bulkvis with MIT License | 5 votes |
def get_annotations(path, fields, enum_field): data_labels = {} for field in fields: data_labels[field] = path[field] data_dtypes = {} if h5py.check_dtype(enum=path.dtype[enum_field]): dataset_dtype = h5py.check_dtype(enum=path.dtype[enum_field]) # data_dtype may lose some dataset dtypes there are duplicates of 'v' data_dtypes = {v: k for k, v in dataset_dtype.items()} labels_df = pd.DataFrame(data=data_labels) return labels_df, data_dtypes
Example #10
Source File: pod_plot.py From bulkvis with MIT License | 5 votes |
def get_annotations(path, fields, enum_field): data_labels = {} for field in fields: data_labels[field] = path[field] data_dtypes = {} if h5py.check_dtype(enum=path.dtype[enum_field]): dataset_dtype = h5py.check_dtype(enum=path.dtype[enum_field]) # data_dtype may lose some dataset dtypes there are duplicates of 'v' data_dtypes = {v: k for k, v in dataset_dtype.items()} labels_df = pd.DataFrame(data=data_labels) return labels_df, data_dtypes
Example #11
Source File: legacyapi.py From h5netcdf with BSD 3-Clause "New" or "Revised" License | 5 votes |
def dtype(self): dt = self._h5ds.dtype if h5py.check_dtype(vlen=dt) is unicode: return str return dt
Example #12
Source File: test_core.py From cooler with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_put(): f = make_hdf5_table('a') # append df = pd.DataFrame({ 'chrom': ['chr3', 'chr3'], 'start': [0, 20], 'end': [20, 40], 'value': [4.0, 5.0], }) core.put(f['table'], df, lo=5) f.flush() out = core.get(f['table']) assert len(out) == 7 # insert a categorical column s = pd.Series(pd.Categorical(out['chrom'], ordered=True), index=out.index) s.name = 'chrom_enum' core.put(f['table'], s) assert h5py.check_dtype(enum=f['table/chrom_enum'].dtype) out = core.get(f['table']) assert len(out.columns) == 5 assert pd.api.types.is_categorical_dtype(out['chrom_enum'].dtype) out = core.get(f['table'], convert_enum=False) assert len(out.columns) == 5 assert pd.api.types.is_integer_dtype(out['chrom_enum'].dtype) # don't convert categorical to enum s.name = 'chrom_string' core.put(f['table'], s, store_categories=False) out = core.get(f['table']) assert len(out.columns) == 6 assert not pd.api.types.is_categorical_dtype(out['chrom_string'].dtype) # scalar input core.put(f['table'], {'foo': 42}) out = core.get(f['table']) assert len(out.columns) == 7 assert (out['foo'] == 42).all()
Example #13
Source File: test_dataset.py From keras-lambda with MIT License | 5 votes |
def test_create(self): """ Enum datasets can be created and type correctly round-trips """ dt = h5py.special_dtype(enum=('i', self.EDICT)) ds = self.f.create_dataset('x', (100, 100), dtype=dt) dt2 = ds.dtype dict2 = h5py.check_dtype(enum=dt2) self.assertEqual(dict2, self.EDICT)
Example #14
Source File: test_datatype.py From keras-lambda with MIT License | 5 votes |
def test_compound(self): fields = [] fields.append(('field_1', h5py.special_dtype(vlen=str))) fields.append(('field_2', np.int32)) dt = np.dtype(fields) self.f['mytype'] = np.dtype(dt) dt_out = self.f['mytype'].dtype.fields['field_1'][0] self.assertEqual(h5py.check_dtype(vlen=dt_out), str)
Example #15
Source File: dataset_view.py From QCPortal with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_values( self, queries: List[Dict[str, Union[str, bool]]], subset: Optional[List[str]] = None ) -> Tuple[pd.DataFrame, Dict[str, str]]: """ Parameters ---------- subset queries: List[Dict[str, Union[str, bool]]] List of queries. Fields actually used are native, name, driver """ import h5py units = {} entries = self.get_index(subset) indexes = entries._h5idx with self._read_file() as f: ret = pd.DataFrame(index=entries["index"]) for query in queries: dataset_name = "value/" if query["native"] else "contributed_value/" dataset_name += self._normalize_hdf5_name(query["name"]) driver = query["driver"] dataset = f[dataset_name] if not h5py.check_dtype(vlen=dataset.dtype): data = [dataset[i] for i in indexes] else: if driver.lower() == "gradient": data = [np.reshape(dataset[i], (-1, 3)) for i in indexes] elif driver.lower() == "hessian": data = [] for i in indexes: n2 = len(dataset[i]) n = int(round(np.sqrt(n2))) data.append(np.reshape(dataset[i], (n, n))) else: warnings.warn( f"Variable length data type not understood, returning flat array " f"(driver = {driver}).", RuntimeWarning, ) try: data = [np.array(dataset[i]) for i in indexes] except ValueError: data = [dataset[i] for i in indexes] column_name = query["name"] column_units = self._deserialize_field(dataset.attrs["units"]) ret[column_name] = data units[column_name] = column_units return ret, units
Example #16
Source File: dataset_view.py From QCFractal with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_values( self, queries: List[Dict[str, Union[str, bool]]], subset: Optional[List[str]] = None ) -> Tuple[pd.DataFrame, Dict[str, str]]: """ Parameters ---------- subset queries: List[Dict[str, Union[str, bool]]] List of queries. Fields actually used are native, name, driver """ import h5py units = {} entries = self.get_index(subset) indexes = entries._h5idx with self._read_file() as f: ret = pd.DataFrame(index=entries["index"]) for query in queries: dataset_name = "value/" if query["native"] else "contributed_value/" dataset_name += self._normalize_hdf5_name(query["name"]) driver = query["driver"] dataset = f[dataset_name] if not h5py.check_dtype(vlen=dataset.dtype): data = [dataset[i] for i in indexes] else: if driver.lower() == "gradient": data = [np.reshape(dataset[i], (-1, 3)) for i in indexes] elif driver.lower() == "hessian": data = [] for i in indexes: n2 = len(dataset[i]) n = int(round(np.sqrt(n2))) data.append(np.reshape(dataset[i], (n, n))) else: warnings.warn( f"Variable length data type not understood, returning flat array " f"(driver = {driver}).", RuntimeWarning, ) try: data = [np.array(dataset[i]) for i in indexes] except ValueError: data = [dataset[i] for i in indexes] column_name = query["name"] column_units = self._deserialize_field(dataset.attrs["units"]) ret[column_name] = data units[column_name] = column_units return ret, units