Python h5py.check_dtype() Examples

The following are 16 code examples of h5py.check_dtype(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module h5py , or try the search function .
Example #1
Source File: test_datatype.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_vlen_enum(self):
        fname = self.mktemp()
        arr1 = [[1],[1,2]]
        dt1 = h5py.special_dtype(vlen=h5py.special_dtype(
            enum=('i', dict(foo=1, bar=2))))

        with h5py.File(fname,'w') as f:
            df1 = f.create_dataset('test', (len(arr1),), dtype=dt1)
            df1[:] = np.array(arr1)

        with h5py.File(fname,'r') as f:
            df2  = f['test']
            dt2  = df2.dtype
            arr2 = [e.tolist() for e in df2[:]]

        self.assertEqual(arr1, arr2)
        self.assertEqual(h5py.check_dtype(enum=h5py.check_dtype(vlen=dt1)),
                         h5py.check_dtype(enum=h5py.check_dtype(vlen=dt2))) 
Example #2
Source File: core.py    From h5netcdf with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _check_valid_netcdf_dtype(self, dtype, stacklevel=3):
        dtype = np.dtype(dtype)

        if dtype == bool:
            description = 'boolean'
        elif dtype == complex:
            description = 'complex'
        elif h5py.check_dtype(enum=dtype) is not None:
            description = 'enum'
        elif h5py.check_dtype(ref=dtype) is not None:
            description = 'reference'
        elif h5py.check_dtype(vlen=dtype) not in {None, unicode, bytes}:
            description = 'non-string variable length'
        else:
            description = None

        if description is not None:
            _invalid_netcdf_feature('{} dtypes'.format(description),
                                    allow=self.invalid_netcdf,
                                    file=self,
                                    stacklevel=stacklevel + 1) 
Example #3
Source File: dask.py    From cooler with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _get_group_info(path, grouppath, keys):
    with h5py.File(path, "r") as f:
        grp = f[grouppath]

        if keys is None:
            keys = list(grp.keys())

        nrows = len(grp[keys[0]])

        categoricals = {}
        for key in keys:
            dt = h5py.check_dtype(enum=grp[key].dtype)
            if dt is not None:
                categoricals[key] = sorted(dt, key=dt.__getitem__)

        # Meta is an empty dataframe that serves as a compound "dtype"
        meta = pd.DataFrame(
            {key: np.array([], dtype=grp[key].dtype) for key in keys}, columns=keys
        )

        for key in categoricals:
            meta[key] = pd.Categorical([], categories=categoricals[key], ordered=True)

    return nrows, keys, meta, categoricals 
Example #4
Source File: test_datatype.py    From keras-lambda with MIT License 6 votes vote down vote up
def test_vlen_enum(self):
        fname = self.mktemp()
        arr1 = [[1],[1,2]]
        dt1 = h5py.special_dtype(vlen=h5py.special_dtype(
            enum=('i', dict(foo=1, bar=2))))

        with h5py.File(fname,'w') as f:
            df1 = f.create_dataset('test', (len(arr1),), dtype=dt1)
            df1[:] = np.array(arr1)

        with h5py.File(fname,'r') as f:
            df2  = f['test']
            dt2  = df2.dtype
            arr2 = [e.tolist() for e in df2[:]]

        self.assertEqual(arr1, arr2)
        self.assertEqual(h5py.check_dtype(enum=h5py.check_dtype(vlen=dt1)),
                         h5py.check_dtype(enum=h5py.check_dtype(vlen=dt2))) 
Example #5
Source File: test_dataset.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_create(self):
        """ Enum datasets can be created and type correctly round-trips """
        dt = h5py.special_dtype(enum=('i', self.EDICT))
        ds = self.f.create_dataset('x', (100, 100), dtype=dt)
        dt2 = ds.dtype
        dict2 = h5py.check_dtype(enum=dt2)
        self.assertEqual(dict2, self.EDICT) 
Example #6
Source File: test_datatype.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_compound(self):

        fields = []
        fields.append(('field_1', h5py.special_dtype(vlen=str)))
        fields.append(('field_2', np.int32))
        dt = np.dtype(fields)
        self.f['mytype'] = np.dtype(dt)
        dt_out = self.f['mytype'].dtype.fields['field_1'][0]
        self.assertEqual(h5py.check_dtype(vlen=dt_out), str) 
Example #7
Source File: main.py    From bulkvis with MIT License 5 votes vote down vote up
def get_annotations(path, fields, enum_field):
    data_labels = {}
    for field in fields:
        data_labels[field] = path[field]
    data_dtypes = {}
    if h5py.check_dtype(enum=path.dtype[enum_field]):
        dataset_dtype = h5py.check_dtype(enum=path.dtype[enum_field])
        # data_dtype may lose some dataset dtypes there are duplicates of 'v'
        data_dtypes = {v: k for k, v in dataset_dtype.items()}
    labels_df = pd.DataFrame(data=data_labels)
    return labels_df, data_dtypes 
Example #8
Source File: set_config.py    From bulkvis with MIT License 5 votes vote down vote up
def get_annotations(path, enum_field):
    data_dtypes = {}
    if h5py.check_dtype(enum=path.dtype[enum_field]):
        data_dtypes = h5py.check_dtype(enum=path.dtype[enum_field])
    return data_dtypes 
Example #9
Source File: whale_plot.py    From bulkvis with MIT License 5 votes vote down vote up
def get_annotations(path, fields, enum_field):
    data_labels = {}
    for field in fields:
        data_labels[field] = path[field]
    data_dtypes = {}
    if h5py.check_dtype(enum=path.dtype[enum_field]):
        dataset_dtype = h5py.check_dtype(enum=path.dtype[enum_field])
        # data_dtype may lose some dataset dtypes there are duplicates of 'v'
        data_dtypes = {v: k for k, v in dataset_dtype.items()}
    labels_df = pd.DataFrame(data=data_labels)
    return labels_df, data_dtypes 
Example #10
Source File: pod_plot.py    From bulkvis with MIT License 5 votes vote down vote up
def get_annotations(path, fields, enum_field):
    data_labels = {}
    for field in fields:
        data_labels[field] = path[field]
    data_dtypes = {}
    if h5py.check_dtype(enum=path.dtype[enum_field]):
        dataset_dtype = h5py.check_dtype(enum=path.dtype[enum_field])
        # data_dtype may lose some dataset dtypes there are duplicates of 'v'
        data_dtypes = {v: k for k, v in dataset_dtype.items()}
    labels_df = pd.DataFrame(data=data_labels)
    return labels_df, data_dtypes 
Example #11
Source File: legacyapi.py    From h5netcdf with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def dtype(self):
        dt = self._h5ds.dtype
        if h5py.check_dtype(vlen=dt) is unicode:
            return str
        return dt 
Example #12
Source File: test_core.py    From cooler with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_put():
    f = make_hdf5_table('a')

    # append
    df = pd.DataFrame({
        'chrom': ['chr3', 'chr3'],
        'start': [0, 20],
        'end': [20, 40],
        'value': [4.0, 5.0],
    })
    core.put(f['table'], df, lo=5)
    f.flush()
    out = core.get(f['table'])
    assert len(out) == 7

    # insert a categorical column
    s = pd.Series(pd.Categorical(out['chrom'], ordered=True), index=out.index)
    s.name = 'chrom_enum'
    core.put(f['table'], s)
    assert h5py.check_dtype(enum=f['table/chrom_enum'].dtype)
    out = core.get(f['table'])
    assert len(out.columns) == 5
    assert pd.api.types.is_categorical_dtype(out['chrom_enum'].dtype)
    out = core.get(f['table'], convert_enum=False)
    assert len(out.columns) == 5
    assert pd.api.types.is_integer_dtype(out['chrom_enum'].dtype)

    # don't convert categorical to enum
    s.name = 'chrom_string'
    core.put(f['table'], s, store_categories=False)
    out = core.get(f['table'])
    assert len(out.columns) == 6
    assert not pd.api.types.is_categorical_dtype(out['chrom_string'].dtype)

    # scalar input
    core.put(f['table'], {'foo': 42})
    out = core.get(f['table'])
    assert len(out.columns) == 7
    assert (out['foo'] == 42).all() 
Example #13
Source File: test_dataset.py    From keras-lambda with MIT License 5 votes vote down vote up
def test_create(self):
        """ Enum datasets can be created and type correctly round-trips """
        dt = h5py.special_dtype(enum=('i', self.EDICT))
        ds = self.f.create_dataset('x', (100, 100), dtype=dt)
        dt2 = ds.dtype
        dict2 = h5py.check_dtype(enum=dt2)
        self.assertEqual(dict2, self.EDICT) 
Example #14
Source File: test_datatype.py    From keras-lambda with MIT License 5 votes vote down vote up
def test_compound(self):

        fields = []
        fields.append(('field_1', h5py.special_dtype(vlen=str)))
        fields.append(('field_2', np.int32))
        dt = np.dtype(fields)
        self.f['mytype'] = np.dtype(dt)
        dt_out = self.f['mytype'].dtype.fields['field_1'][0]
        self.assertEqual(h5py.check_dtype(vlen=dt_out), str) 
Example #15
Source File: dataset_view.py    From QCPortal with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_values(
        self, queries: List[Dict[str, Union[str, bool]]], subset: Optional[List[str]] = None
    ) -> Tuple[pd.DataFrame, Dict[str, str]]:
        """
        Parameters
        ----------
        subset
        queries: List[Dict[str, Union[str, bool]]]
            List of queries. Fields actually used are native, name, driver
        """
        import h5py

        units = {}
        entries = self.get_index(subset)
        indexes = entries._h5idx
        with self._read_file() as f:
            ret = pd.DataFrame(index=entries["index"])

            for query in queries:
                dataset_name = "value/" if query["native"] else "contributed_value/"
                dataset_name += self._normalize_hdf5_name(query["name"])
                driver = query["driver"]

                dataset = f[dataset_name]
                if not h5py.check_dtype(vlen=dataset.dtype):
                    data = [dataset[i] for i in indexes]
                else:
                    if driver.lower() == "gradient":
                        data = [np.reshape(dataset[i], (-1, 3)) for i in indexes]
                    elif driver.lower() == "hessian":
                        data = []
                        for i in indexes:
                            n2 = len(dataset[i])
                            n = int(round(np.sqrt(n2)))
                            data.append(np.reshape(dataset[i], (n, n)))
                    else:
                        warnings.warn(
                            f"Variable length data type not understood, returning flat array " f"(driver = {driver}).",
                            RuntimeWarning,
                        )
                        try:
                            data = [np.array(dataset[i]) for i in indexes]
                        except ValueError:
                            data = [dataset[i] for i in indexes]
                column_name = query["name"]
                column_units = self._deserialize_field(dataset.attrs["units"])
                ret[column_name] = data
                units[column_name] = column_units

        return ret, units 
Example #16
Source File: dataset_view.py    From QCFractal with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_values(
        self, queries: List[Dict[str, Union[str, bool]]], subset: Optional[List[str]] = None
    ) -> Tuple[pd.DataFrame, Dict[str, str]]:
        """
        Parameters
        ----------
        subset
        queries: List[Dict[str, Union[str, bool]]]
            List of queries. Fields actually used are native, name, driver
        """
        import h5py

        units = {}
        entries = self.get_index(subset)
        indexes = entries._h5idx
        with self._read_file() as f:
            ret = pd.DataFrame(index=entries["index"])

            for query in queries:
                dataset_name = "value/" if query["native"] else "contributed_value/"
                dataset_name += self._normalize_hdf5_name(query["name"])
                driver = query["driver"]

                dataset = f[dataset_name]
                if not h5py.check_dtype(vlen=dataset.dtype):
                    data = [dataset[i] for i in indexes]
                else:
                    if driver.lower() == "gradient":
                        data = [np.reshape(dataset[i], (-1, 3)) for i in indexes]
                    elif driver.lower() == "hessian":
                        data = []
                        for i in indexes:
                            n2 = len(dataset[i])
                            n = int(round(np.sqrt(n2)))
                            data.append(np.reshape(dataset[i], (n, n)))
                    else:
                        warnings.warn(
                            f"Variable length data type not understood, returning flat array " f"(driver = {driver}).",
                            RuntimeWarning,
                        )
                        try:
                            data = [np.array(dataset[i]) for i in indexes]
                        except ValueError:
                            data = [dataset[i] for i in indexes]
                column_name = query["name"]
                column_units = self._deserialize_field(dataset.attrs["units"])
                ret[column_name] = data
                units[column_name] = column_units

        return ret, units