Python h5py.Group() Examples

The following are 30 code examples of h5py.Group(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module h5py , or try the search function

Example #1

Source File: m0_1_0_newDbFormat.py From armi with Apache License 2.0

6 votes

def _visit(newDB, preCollection, name, dataset):

    updated = False
    # runLog.important(f"Visiting Dataset {name}")
    path = name.split("/")
    if path[0] == "inputs":
        pass
    elif len(path) > 1 and path[1] == "layout":
        updated = _updateLayout(newDB, preCollection, name, dataset)
    elif len(path) == 3:
        updated = _updateParams(newDB, preCollection, name, dataset)

    if not updated:
        if isinstance(dataset, h5py.Group):
            # Skip groups because they come along with copied datasets
            msg = "Skipped"
        else:
            newDB.copy(dataset, dataset.name)
            msg = "Copied"
    else:
        msg = "Updated"

    runLog.important(f"{msg} Dataset {name}")

Example #2

Source File: save.py From chainer with MIT License

6 votes

def save_parameters_as_hdf5(model, filename='model.h5'):
    # Save the model parameters into a HDF5 archive
    chainer.serializers.save_hdf5(filename, model)
    print('model.h5 saved!\n')

    # Load the saved HDF5 using h5py
    print('--- The list of saved params in model.h5 ---')
    f = h5py.File('model.h5', 'r')
    for param_key, param in f.items():
        msg = '{}:'.format(param_key)
        if isinstance(param, h5py.Dataset):
            msg += ' {}'.format(param.shape)
        print(msg)
        if isinstance(param, h5py.Group):
            for child_key, child in param.items():
                print('  {}:{}'.format(child_key, child.shape))
    print('---------------------------------------------\n')

Example #3

Source File: h5ad.py From anndata with BSD 3-Clause "New" or "Revised" License

6 votes

def write_sparse_as_dense(f, key, value, dataset_kwargs=MappingProxyType({})):
    real_key = None  # Flag for if temporary key was used
    if key in f:
        if (
            isinstance(value, (h5py.Group, h5py.Dataset, SparseDataset))
            and value.file.filename == f.filename
        ):  # Write to temporary key before overwriting
            real_key = key
            # Transform key to temporary, e.g. raw/X -> raw/_X, or X -> _X
            key = re.sub(r"(.*)(\w(?!.*/))", r"\1_\2", key.rstrip("/"))
        else:
            del f[key]  # Wipe before write
    dset = f.create_dataset(key, shape=value.shape, dtype=value.dtype, **dataset_kwargs)
    compressed_axis = int(isinstance(value, sparse.csc_matrix))
    for idx in idx_chunks_along_axis(value.shape, compressed_axis, 1000):
        dset[idx] = value[idx].toarray()
    if real_key is not None:
        del f[real_key]
        f[real_key] = f[key]
        del f[key]

Example #4

Source File: file_reading.py From Deep_MRI_brain_extraction with MIT License

6 votes

def load_h5(fname, key=None):
    """load h5 file"""
    try:
        hfile = h5py.File(fname,'r')
    except:
        assert 0, "\nload_h5()::ERROR: Cannot open <<"+str(fname)+">>\n"
    if key==None:
        try:
            key = hfile.keys()[0]
        except:
            assert 0, "\nload_h5()::ERROR: File is not h5 / is empty  <<"+str(fname)+">>\n"
    xx = hfile[key]
    if isinstance(xx, h5py.Group):
        xx=dict(xx)
        xx = xx[xx.keys()[0] if key==None else key]
    dat = np.asarray(xx, dtype = xx.dtype)
    hfile.close()
    return dat

Example #5

Source File: hdsortsortingextractor.py From spikeextractors with MIT License

6 votes

def _parse_units(file, _units):
    import h5py

    t_units = {}
    if isinstance(_units, h5py.Group):
        for name in _units.keys():
            value = _units[name]
            dict_val = []
            for val in value:
                if isinstance(file[val[0]], h5py.Dataset):
                    dict_val.append(file[val[0]][()])
                    t_units[name] = dict_val
                else:
                    break
        out = [dict(zip(t_units, col)) for col in zip(*t_units.values())]
    else:
        out = []
        for unit in _units:
            group = file[unit[()][0]]
            unit_dict = {}
            for k in group.keys():
                unit_dict[k] = group[k][()]
            out.append(unit_dict)

    return out

Example #6

Source File: hdf5.py From Pyslvs-UI with GNU Affero General Public License v3.0

6 votes

def _h5py_load(f: Group) -> Mapping[str, Any]:
    """Load function for h5py."""
    data = {}
    for k, v in f.items():  # type: str, Union[Group, Dataset]
        if type(v) is Group:
            data[k] = _h5py_load(v)
        elif type(v) is Dataset:
            value = v[()]
            if type(value) is void:
                value = _decompress(value)
                if value.startswith(b's'):
                    value = value.decode('utf-8')[1:]
                elif value.startswith(b'!'):
                    value = eval(value.decode('utf-8')[1:])
            data[k] = value
    return data

Example #7

Source File: pyanitools.py From deepchem with MIT License

6 votes

def get_data(self, path, prefix=''):
    item = self.store[path]
    path = '{}/{}'.format(prefix, path)
    keys = [i for i in item.keys()]
    data = {'path': path}
    # print(path)
    for k in keys:
      if not isinstance(item[k], h5py.Group):
        dataset = np.array(item[k].value)

        if type(dataset) is np.ndarray:
          if dataset.size != 0:
            if type(dataset[0]) is np.bytes_:
              dataset = [a.decode('ascii') for a in dataset]

        data.update({k: dataset})
    return data

Example #8

Source File: pyanitools.py From deepchem with MIT License

6 votes

def h5py_dataset_iterator(self, g, prefix=''):
    for key in g.keys():
      item = g[key]
      path = '{}/{}'.format(prefix, key)
      keys = [i for i in item.keys()]
      if isinstance(item[keys[0]], h5py.Dataset):  # test for dataset
        data = {'path': path}
        for k in keys:
          if not isinstance(item[k], h5py.Group):
            dataset = np.array(item[k].value)

            if type(dataset) is np.ndarray:
              if dataset.size != 0:
                if type(dataset[0]) is np.bytes_:
                  dataset = [a.decode('ascii') for a in dataset]

            data.update({k: dataset})

        yield data
      else:  # test for group (go down)
        for s in self.h5py_dataset_iterator(item, path):
          yield s

Example #9

Source File: _pyanitools.py From torchani with MIT License

6 votes

def get_data(self, path, prefix=''):
        """Returns the requested dataset"""
        item = self.store[path]
        path = '{}/{}'.format(prefix, path)
        keys = [i for i in item.keys()]
        data = {'path': path}
        for k in keys:
            if not isinstance(item[k], h5py.Group):
                dataset = np.array(item[k][()])

                if isinstance(dataset, np.ndarray):
                    if dataset.size != 0:
                        if isinstance(dataset[0], np.bytes_):
                            dataset = [a.decode('ascii') for a in dataset]

                data.update({k: dataset})
        return data

Example #10

Source File: addons.py From pyscf with Apache License 2.0

6 votes

def __enter__(self):
        if isinstance(self.eri, str):
            feri = self.feri = h5py.File(self.eri, 'r')
        elif isinstance(self.eri, h5py.Group):
            feri = self.eri
        elif isinstance(getattr(self.eri, 'name', None), str):
            feri = self.feri = h5py.File(self.eri.name, 'r')
        elif isinstance(self.eri, numpy.ndarray):
            return self.eri
        else:
            raise RuntimeError('Unknown eri type %s', type(self.eri))

        if self.dataname is None:
            return feri
        else:
            return feri[self.dataname]

Example #11

Source File: _pyanitools.py From torchani with MIT License

6 votes

def h5py_dataset_iterator(self, g, prefix=''):
        """Group recursive iterator

        Iterate through all groups in all branches and return datasets in dicts)
        """
        for key in g.keys():
            item = g[key]
            path = '{}/{}'.format(prefix, key)
            keys = [i for i in item.keys()]
            if isinstance(item[keys[0]], h5py.Dataset):  # test for dataset
                data = {'path': path}
                for k in keys:
                    if not isinstance(item[k], h5py.Group):
                        dataset = np.array(item[k][()])

                        if isinstance(dataset, np.ndarray):
                            if dataset.size != 0:
                                if isinstance(dataset[0], np.bytes_):
                                    dataset = [a.decode('ascii')
                                               for a in dataset]
                        data.update({k: dataset})
                yield data
            else:  # test for group (go down)
                yield from self.h5py_dataset_iterator(item, path)

Example #12

Source File: h5ad.py From anndata with BSD 3-Clause "New" or "Revised" License

6 votes

def read_series(dataset) -> Union[np.ndarray, pd.Categorical]:
    if "categories" in dataset.attrs:
        categories = dataset.attrs["categories"]
        if isinstance(categories, h5py.Reference):
            categories_dset = dataset.parent[dataset.attrs["categories"]]
            categories = categories_dset[...]
            ordered = bool(categories_dset.attrs.get("ordered", False))
        else:
            # TODO: remove this code at some point post 0.7
            # TODO: Add tests for this
            warn(
                f"Your file {str(dataset.file.name)!r} has invalid categorical "
                "encodings due to being written from a development version of "
                "AnnData. Rewrite the file ensure you can read it in the future.",
                FutureWarning,
            )
        return pd.Categorical.from_codes(dataset[...], categories, ordered=ordered)
    else:
        return dataset[...]


# @report_read_key_on_error
# def read_sparse_dataset_backed(group: h5py.Group) -> sparse.spmatrix:
#     return SparseDataset(group)

Example #13

Source File: population.py From sonata with BSD 3-Clause "New" or "Revised" License

6 votes

def __init__(self, pop_name, pop_group, types_table):
        self._pop_name = pop_name
        self._pop_group = pop_group
        self._types_table = types_table
        self._nrows = 0

        # For storing individual groups
        self._group_map = {}  # grp-id --> h5py.Group object
        self._find_groups()
        self._group_cache = {}  # grp-id --> soneta.io.Group() object

        # Refrences to most of the population's primary dataset
        self._type_id_ds = pop_group[self.type_ids_column]
        self._group_id_ds = pop_group[self.group_id_column]
        self._group_index_ds = pop_group[self.group_index_column]

        self._group_indicies = {}  # grp-id --> list of rows indicies
        self._group_indicies_cache_built = False

Example #14

Source File: raw.py From anndata with BSD 3-Clause "New" or "Revised" License

6 votes

def X(self) -> Union[SparseDataset, np.ndarray, sparse.spmatrix]:
        # TODO: Handle unsorted array of integer indices for h5py.Datasets
        if not self._adata.isbacked:
            return self._X
        if not self._adata.file.is_open:
            self._adata.file.open()
        # Handle legacy file formats:
        if "raw/X" in self._adata.file:
            X = self._adata.file["raw/X"]
        elif "raw.X" in self._adata.file:
            X = self._adata.file["raw.X"]  # Backwards compat
        else:
            raise AttributeError(
                f"Could not find dataset for raw X in file: "
                f"{self._adata.file.filename}."
            )
        if isinstance(X, h5py.Group):
            X = SparseDataset(X)
        # Check if we need to subset
        if self._adata.is_view:
            # TODO: As noted above, implement views of raw
            #       so we can know if we need to subset by var
            return X[self._adata._oidx, slice(None)]
        else:
            return X

Example #15

Source File: file_root.py From sonata with BSD 3-Clause "New" or "Revised" License

6 votes

def __init__(self, root_name, h5_files, h5_mode, csv_files):
        """
        :param root_name: should either be 'nodes' or 'edges'
        :param h5_files: file (or list of files) containing nodes/edges
        :param h5_mode: currently only supporting 'r' mode in h5py
        :param csv_files: file (or list of files) containing node/edge types
        """
        self._root_name = root_name
        self._h5_handles = [utils.load_h5(f, h5_mode) for f in utils.listify(h5_files)]
        self._csv_handles = [(f, utils.load_csv(f)) for f in utils.listify(csv_files)]

        # merge and create a table of the types table(s)
        self._types_table = None
        self._build_types_table()

        # population_name->h5py.Group table (won't instantiate the population)
        self._populations_groups = {}
        self._store_groups()

        # A map between population_name -> Population object. Population objects aren't created until called, in the
        # case user wants to split populations among MPI nodes (instantiation will create node/edge indicies and other
        # overhead).
        self._populations_cache = {}

        self.check_format()

Example #16

Source File: network_components.py From TensorNetwork with Apache License 2.0

6 votes

def _load_edge(cls, edge_data: h5py.Group, nodes_dict: Dict[Text,
                                                              AbstractNode]):
    """load an edge based on hdf5 data.

    Args:
      edge_data: h5py group that contains the serialized edge data
      nodes: dictionary of node's name, node

    Returns:
      The added edge.
    """
    node1 = nodes_dict[edge_data["node1"][()]]
    axis1 = int(edge_data["axis1"][()])
    if "node2" in list(edge_data.keys()):
      node2 = nodes_dict[edge_data["node2"][()]]
      axis2 = int(edge_data["axis2"][()])
    else:
      node2 = None
      axis2 = None
    name = edge_data["name"][()]
    edge = cls(node1=node1, axis1=axis1, node2=node2, axis2=axis2, name=name)
    node1.add_edge(edge, axis1)
    if node2 is not None:
      node2.add_edge(edge, axis2)
    return edge

Example #17

Source File: fileio_backends.py From scqubits with BSD 3-Clause "New" or "Revised" License

6 votes

def write_attributes(self, h5file_group):
        """
        Attribute data consists of

         1. `__init__` parameters that are of type str or numerical. These are directly written into `h5py.Group.attrs`
         2. lists are stored under `<h5py.Group>/__lists`
         3. dicts are stored under `<h5py.Group>/__dicts`

        Parameters
        ----------
        h5file_group: h5py.Group
        """
        h5file_group.attrs.create("__type", self.io_data.typename)    # Record the type of the current class instance
        attributes = self.io_data.attributes
        for attr_name, attr_value in attributes.items():
            if isinstance(attr_value, dict):  # h5py does not serialize dicts automatically, so have to do it manually
                group_name = "__dicts/" + attr_name
                h5file_group.create_group(group_name)
                io.write(attr_value, self.filename, file_handle=h5file_group[group_name])
            elif isinstance(attr_value, (list, tuple)):
                group_name = "__lists/" + attr_name
                h5file_group.create_group(group_name)
                io.write(attr_value, self.filename, file_handle=h5file_group[group_name])
            else:
                h5file_group.attrs[attr_name] = attr_value

Example #18

Source File: network_components.py From TensorNetwork with Apache License 2.0

6 votes

def _load_node(cls, node_data: h5py.Group) -> "CopyNode":
    """Load a node based on hdf5 data.

    Args:
      node_data: h5py group that contains the serialized node data

    Returns:
      The loaded node.
    """
    name, shape, axis_names, backend = cls._load_node_data(node_data)
    copy_node_dtype = np.dtype(node_data['copy_node_dtype'][()])
    # pylint: disable=unnecessary-comprehension
    node = CopyNode(
        rank=len(shape),
        dimension=shape[0],
        name=name,
        axis_names=[ax for ax in axis_names],
        backend=backend,
        dtype=copy_node_dtype)

    return node

Example #19

Source File: network_components.py From TensorNetwork with Apache License 2.0

6 votes

def _load_node(cls, node_data: h5py.Group) -> "AbstractNode":
    """Load a node based on hdf5 data.

    Args:
      node_data: h5py group that contains the serialized node data

    Returns:
      The loaded node.
    """
    name, _, axis_names, backend = cls._load_node_data(node_data)
    tensor = node_data['tensor'][()]
    # pylint: disable=unnecessary-comprehension
    node = Node(
        tensor,
        name=name,
        axis_names=[ax for ax in axis_names],
        backend=backend)
    return node

Example #20

Source File: fileio_backends.py From scqubits with BSD 3-Clause "New" or "Revised" License

6 votes

def to_file(self, io_data, file_handle=None):
        """
        Takes the serialized IOData and writes it either to a new h5 file with file name given by `self.filename` to to
        the given h5py.Group of an open h5 file.

        Parameters
        ----------
        io_data: IOData
        file_handle: h5py.Group, optional
        """
        self.io_data = io_data
        if file_handle is None:
            h5file_group = h5py.File(self.filename, 'w')
        else:
            h5file_group = file_handle

        self.write_attributes(h5file_group)
        self.write_ndarrays(h5file_group)
        self.write_objects(h5file_group)

Example #21

Source File: network_components.py From TensorNetwork with Apache License 2.0

6 votes

def _save_node(self, node_group: h5py.Group) -> None:
    """Abstract method to enable saving nodes to hdf5. Only serializing common
    properties is implemented. Should be overwritten by subclasses.

    Args:
      node_group: h5py group where data is saved
    """
    node_group.create_dataset('type', data=type(self).__name__)
    node_group.create_dataset('backend', data=self.backend.name)
    node_group.create_dataset('name', data=self.name)
    node_group.create_dataset('shape', data=self.shape)
    if self.axis_names:
      node_group.create_dataset(
          'axis_names',
          dtype=string_type,
          data=np.array(self.axis_names, dtype=object))
    else:  #couldn't find any documentation on saving None
      node_group.create_dataset('axis_names', dtype='i', data=123456789)

    node_group.create_dataset(
        'edges',
        dtype=string_type,
        data=np.array([edge.name for edge in self.edges], dtype=object))

Example #22

Source File: fileio_backends.py From scqubits with BSD 3-Clause "New" or "Revised" License

6 votes

def read_objects(self, h5file_group):
        """
        Read data from the given h5 file group that represents a Python object other than an ndarray, list, or dict.

        Parameters
        ----------
        h5file_group: h5py.Group

        Returns
        -------
        dict [str, IOData]
        """
        inner_objects = {}
        h5file_group = h5file_group["__objects"]
        for obj_name in h5file_group:
            inner_objects[obj_name] = io.read(self.filename, h5file_group[obj_name])
        return inner_objects

Example #23

Source File: fileio_backends.py From scqubits with BSD 3-Clause "New" or "Revised" License

6 votes

def from_file(self, filename, file_handle=None):
        """
        Either opens a new h5 file for reading or accesses an already opened file via the given h5.Group handle. Reads
        all data from the three categories of attributes (incl. lists and dicts), ndarrays, and objects.

        Parameters
        ----------
        filename: str
        file_handle: h5.Group, optional

        Returns
        -------
        IOData
        """
        if file_handle is None:
            h5file_group = h5py.File(filename, 'r')
        else:
            h5file_group = file_handle

        attributes = self.read_attributes(h5file_group)
        typename = attributes['__type']
        del attributes['__type']
        ndarrays = self.read_ndarrays(h5file_group)
        inner_objects = self.read_objects(h5file_group)
        return io.IOData(typename, attributes, ndarrays, inner_objects)

Example #24

Source File: io_utils.py From GraphicDesignPatternByPython with MIT License

6 votes

def __init__(self, path, mode='a'):
        if isinstance(path, h5py.Group):
            self.data = path
            self._is_file = False
        elif isinstance(path, str):
            self.data = h5py.File(path, mode=mode)
            self._is_file = True
        elif isinstance(path, dict):
            self.data = path
            self._is_file = False
            if mode == 'w':
                self.data.clear()
            # Flag to check if a dict is user defined data or a sub group:
            self.data['_is_group'] = True
        else:
            raise TypeError('Required Group, str or dict. '
                            'Received: {}.'.format(type(path)))
        self.read_only = mode == 'r'

Example #25

Source File: pyanitools.py From ANI1_dataset with MIT License

6 votes

def get_data(self, path, prefix=''):
        item = self.store[path]
        path = '{}/{}'.format(prefix, path)
        keys = [i for i in item.keys()]
        data = {'path': path}
        # print(path)
        for k in keys:
            if not isinstance(item[k], h5py.Group):
                dataset = np.array(item[k].value)

                if type(dataset) is np.ndarray:
                    if dataset.size != 0:
                        if type(dataset[0]) is np.bytes_:
                            dataset = [a.decode('ascii') for a in dataset]

                data.update({k: dataset})
        return data

Example #26

Source File: pyanitools.py From ANI1_dataset with MIT License

6 votes

def h5py_dataset_iterator(self,g, prefix=''):
        for key in g.keys():
            item = g[key]
            path = '{}/{}'.format(prefix, key)
            keys = [i for i in item.keys()]
            if isinstance(item[keys[0]], h5py.Dataset): # test for dataset
                data = {'path':path}
                for k in keys:
                    if not isinstance(item[k], h5py.Group):
                        dataset = np.array(item[k].value)

                        if type(dataset) is np.ndarray:
                            if dataset.size != 0:
                                if type(dataset[0]) is np.bytes_:
                                    dataset = [a.decode('ascii') for a in dataset]

                        data.update({k:dataset})

                yield data
            else: # test for group (go down)
                yield from self.h5py_dataset_iterator(item, path)

Example #27

Source File: mparray.py From mpnum with BSD 3-Clause "New" or "Revised" License

6 votes

def dump(self, target):
        """Serializes MPArray to :code:`h5py.Group`. Recover using
        :func:`~load`.

        :param target: :code:`h5py.Group` the instance should be saved to or
            path to h5 file (it's then serialized to /)

        """
        if isinstance(target, str):
            import h5py
            with h5py.File(target, 'w') as outfile:
                return self.dump(outfile)

        for prop in ('ranks', 'shape'):
            # these are only saved for convenience
            target.attrs[prop] = str(getattr(self, prop))

        # these are actually used in MPArray.load
        target.attrs['len'] = len(self)
        target.attrs['canonical_form'] = self.canonical_form

        for site, lten in enumerate(self._lt):
            target[str(site)] = lten

Example #28

Source File: mparray.py From mpnum with BSD 3-Clause "New" or "Revised" License

6 votes

def group_sites(self, sites_per_group):
        """Group several MPA sites into one site.

        The resulting MPA has length ``len(self) // sites_per_group`` and
        ``sites_per_group * self.ndims[i]`` physical legs on site ``i``. The
        physical legs on each sites are in local form.

        :param int sites_per_group: Number of sites to be grouped into one
        :returns: An MPA with ``sites_per_group`` fewer sites and more ndims

        """
        if (len(self) % sites_per_group) != 0:
            raise ValueError('Cannot group: {} not a multiple of {}'
                             .format(len(self), sites_per_group))

        if sites_per_group == 1:
            return self
        ltens = [_ltens_to_array(self._lt[i:i + sites_per_group])
                 for i in range(0, len(self), sites_per_group)]
        return MPArray(ltens)

Example #29

Source File: df.py From pyscf with Apache License 2.0

6 votes

def get_naoaux(self):
        '''The dimension of auxiliary basis at gamma point'''
# determine naoaux with self._cderi, because DF object may be used as CD
# object when self._cderi is provided.
        if self._cderi is None:
            self.build()
        # self._cderi['j3c/k_id/seg_id']
        with addons.load(self._cderi, 'j3c/0') as feri:
            if isinstance(feri, h5py.Group):
                naux = feri['0'].shape[0]
            else:
                naux = feri.shape[0]

        cell = self.cell
        if (cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum' and
            not isinstance(self._cderi, numpy.ndarray)):
            with h5py.File(self._cderi, 'r') as feri:
                if 'j3c-/0' in feri:
                    dat = feri['j3c-/0']
                    if isinstance(dat, h5py.Group):
                        naux += dat['0'].shape[0]
                    else:
                        naux += dat.shape[0]
        return naux

Example #30

Source File: database.py From PointNetGPD with MIT License

5 votes

def objects(self):
        """ :obj:`h5py.Group` : Data containing handles of objects.
        Acts like a dictionary mapping object keys to object data.
        """
        return self.data_[OBJECTS_KEY]