Python h5py.Dataset() Examples

The following are 30 code examples of h5py.Dataset(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module h5py , or try the search function .
Example #1
Source File: h5ad.py    From anndata with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _read_raw(
    f: Union[h5py.File, AnnDataFileManager],
    as_sparse: Collection[str] = (),
    rdasp: Callable[[h5py.Dataset], sparse.spmatrix] = None,
    *,
    attrs: Collection[str] = ("X", "var", "varm"),
):
    if as_sparse:
        assert rdasp is not None, "must supply rdasp if as_sparse is supplied"
    raw = {}
    if "X" in attrs and "raw/X" in f:
        read_x = rdasp if "raw/X" in as_sparse else read_attribute
        raw["X"] = read_x(f["raw/X"])
    for v in ("var", "varm"):
        if v in attrs and f"raw/{v}" in f:
            raw[v] = read_attribute(f[f"raw/{v}"])
    return _read_legacy_raw(f, raw, read_dataframe, read_attribute, attrs=attrs) 
Example #2
Source File: hdf5.py    From Pyslvs-UI with GNU Affero General Public License v3.0 6 votes vote down vote up
def _h5py_load(f: Group) -> Mapping[str, Any]:
    """Load function for h5py."""
    data = {}
    for k, v in f.items():  # type: str, Union[Group, Dataset]
        if type(v) is Group:
            data[k] = _h5py_load(v)
        elif type(v) is Dataset:
            value = v[()]
            if type(value) is void:
                value = _decompress(value)
                if value.startswith(b's'):
                    value = value.decode('utf-8')[1:]
                elif value.startswith(b'!'):
                    value = eval(value.decode('utf-8')[1:])
            data[k] = value
    return data 
Example #3
Source File: _pyanitools.py    From torchani with MIT License 6 votes vote down vote up
def h5py_dataset_iterator(self, g, prefix=''):
        """Group recursive iterator

        Iterate through all groups in all branches and return datasets in dicts)
        """
        for key in g.keys():
            item = g[key]
            path = '{}/{}'.format(prefix, key)
            keys = [i for i in item.keys()]
            if isinstance(item[keys[0]], h5py.Dataset):  # test for dataset
                data = {'path': path}
                for k in keys:
                    if not isinstance(item[k], h5py.Group):
                        dataset = np.array(item[k][()])

                        if isinstance(dataset, np.ndarray):
                            if dataset.size != 0:
                                if isinstance(dataset[0], np.bytes_):
                                    dataset = [a.decode('ascii')
                                               for a in dataset]
                        data.update({k: dataset})
                yield data
            else:  # test for group (go down)
                yield from self.h5py_dataset_iterator(item, path) 
Example #4
Source File: test_deprecations.py    From anndata with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_force_dense_deprecated(tmp_path):
    dense_pth = tmp_path / "dense.h5ad"
    adata = AnnData(X=sparse.random(10, 10, format="csr"))
    adata.raw = adata

    with pytest.warns(FutureWarning):
        adata.write_h5ad(dense_pth, force_dense=True)
    with h5py.File(dense_pth, "r") as f:
        assert isinstance(f["X"], h5py.Dataset)
        assert isinstance(f["raw/X"], h5py.Dataset)

    dense = ad.read_h5ad(dense_pth)

    assert isinstance(dense.X, np.ndarray)
    assert isinstance(dense.raw.X, np.ndarray)
    assert_equal(adata, dense)


#######################################
# Dealing with uns adj matrices
####################################### 
Example #5
Source File: test_io_conversion.py    From anndata with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_sparse_to_dense_inplace(tmp_path, spmtx_format):
    pth = tmp_path / "adata.h5ad"
    orig = gen_adata((50, 50), spmtx_format)
    orig.raw = orig
    orig.write(pth)
    backed = ad.read_h5ad(pth, backed="r+")
    backed.write(as_dense=("X", "raw/X"))
    new = ad.read_h5ad(pth)

    assert_equal(orig, new)
    assert_equal(backed, new)

    assert isinstance(new.X, np.ndarray)
    assert isinstance(new.raw.X, np.ndarray)
    assert isinstance(orig.X, spmtx_format)
    assert isinstance(orig.raw.X, spmtx_format)
    assert isinstance(backed.X, h5py.Dataset)
    assert isinstance(backed.raw.X, h5py.Dataset) 
Example #6
Source File: compartment_reader.py    From sonata with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self, pop_grp, pop_name):
        self._data_grp = pop_grp['data']
        self._mapping = pop_grp['mapping']
        self._population = pop_name

        self._gid2data_table = {}
        if self._mapping is None:
            raise Exception('could not find /mapping group')

        gids_ds = self._mapping[self.node_ids_ds]  # ['node_ids']
        index_pointer_ds = self._mapping['index_pointer']
        for indx, gid in enumerate(gids_ds):
            self._gid2data_table[gid] = slice(index_pointer_ds[indx], index_pointer_ds[indx+1])

        time_ds = self._mapping['time']
        self._t_start = np.float(time_ds[0])
        self._t_stop = np.float(time_ds[1])
        self._dt = np.float(time_ds[2])
        self._n_steps = int((self._t_stop - self._t_start) / self._dt)

        self._custom_cols = {col: grp for col, grp in self._mapping.items() if
                             col not in self.sonata_columns and isinstance(grp, h5py.Dataset)} 
Example #7
Source File: h5ad.py    From anndata with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def write_sparse_as_dense(f, key, value, dataset_kwargs=MappingProxyType({})):
    real_key = None  # Flag for if temporary key was used
    if key in f:
        if (
            isinstance(value, (h5py.Group, h5py.Dataset, SparseDataset))
            and value.file.filename == f.filename
        ):  # Write to temporary key before overwriting
            real_key = key
            # Transform key to temporary, e.g. raw/X -> raw/_X, or X -> _X
            key = re.sub(r"(.*)(\w(?!.*/))", r"\1_\2", key.rstrip("/"))
        else:
            del f[key]  # Wipe before write
    dset = f.create_dataset(key, shape=value.shape, dtype=value.dtype, **dataset_kwargs)
    compressed_axis = int(isinstance(value, sparse.csc_matrix))
    for idx in idx_chunks_along_axis(value.shape, compressed_axis, 1000):
        dset[idx] = value[idx].toarray()
    if real_key is not None:
        del f[real_key]
        f[real_key] = f[key]
        del f[key] 
Example #8
Source File: h5ad.py    From anndata with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def read_dataset(dataset: h5py.Dataset):
    value = dataset[()]
    if not hasattr(value, "dtype"):
        return value
    elif isinstance(value.dtype, str):
        pass
    elif issubclass(value.dtype.type, np.string_):
        value = value.astype(str)
        # Backwards compat, old datasets have strings as one element 1d arrays
        if len(value) == 1:
            return value[0]
    elif len(value.dtype.descr) > 1:  # Compound dtype
        # For backwards compat, now strings are written as variable length
        value = _from_fixed_length_strings(value)
    if value.shape == ():
        value = value[()]
    return value 
Example #9
Source File: test_dataset.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_float(self):
        """ Scaleoffset filter works for floating point data """

        scalefac = 4
        shape = (100, 300)
        range = 20*10**scalefac
        testdata = (np.random.rand(*shape)-0.5)*range

        dset = self.f.create_dataset('foo', shape, dtype=float, scaleoffset=scalefac)

        # Dataset reports that scaleoffset is in use
        assert dset.scaleoffset is not None

        # Dataset round-trips
        dset[...] = testdata
        filename = self.f.filename
        self.f.close()
        self.f = h5py.File(filename, 'r')
        readdata = self.f['foo'][...]

        # Test that data round-trips to requested precision
        self.assertArrayEqual(readdata, testdata, precision=10**(-scalefac))

        # Test that the filter is actually active (i.e. compression is lossy)
        assert not (readdata == testdata).all() 
Example #10
Source File: save.py    From chainer with MIT License 6 votes vote down vote up
def save_parameters_as_hdf5(model, filename='model.h5'):
    # Save the model parameters into a HDF5 archive
    chainer.serializers.save_hdf5(filename, model)
    print('model.h5 saved!\n')

    # Load the saved HDF5 using h5py
    print('--- The list of saved params in model.h5 ---')
    f = h5py.File('model.h5', 'r')
    for param_key, param in f.items():
        msg = '{}:'.format(param_key)
        if isinstance(param, h5py.Dataset):
            msg += ' {}'.format(param.shape)
        print(msg)
        if isinstance(param, h5py.Group):
            for child_key, child in param.items():
                print('  {}:{}'.format(child_key, child.shape))
    print('---------------------------------------------\n') 
Example #11
Source File: io.py    From pwtools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def read_h5(fn):
    """Read h5 file into dict.

    Dict keys are the group + dataset names, e.g. '/a/b/c/dset'. All keys start
    with a leading slash even if written without (see :func:`write_h5`).

    Parameters
    ----------
    fn : str
        filename

    Examples
    --------
    >>> read_h5('foo.h5').keys()
    ['/a/b/d1', '/a/b/d2', '/a/c/d3', '/x/y/z']
    """
    fh = h5py.File(fn, mode='r')
    dct = {}
    def get(name, obj, dct=dct):
        if isinstance(obj, h5py.Dataset):
            _name = name if name.startswith('/') else '/'+name
            dct[_name] = obj[()]
    fh.visititems(get)
    fh.close()
    return dct 
Example #12
Source File: hdf.py    From wradlib with MIT License 6 votes vote down vote up
def from_hdf5(fpath, dataset="data"):
    """Loading data from hdf5 files that was stored by \
    :meth:`~wradlib.io.to_hdf5`

    Parameters
    ----------
    fpath : string
        path to the hdf5 file
    dataset : string
        name of the Dataset in which the data is stored
    """
    f = h5py.File(fpath, mode="r")
    # Check whether Dataset exists
    if dataset not in f.keys():
        raise KeyError(
            "WRADLIB: Cannot read Dataset <%s> from hdf5 file " "<%s>" % (dataset, f)
        )
    data = np.array(f[dataset][:])
    # get metadata
    metadata = {}
    for key in f[dataset].attrs.keys():
        metadata[key] = f[dataset].attrs[key]
    f.close()
    return data, metadata 
Example #13
Source File: pyanitools.py    From ANI1_dataset with MIT License 6 votes vote down vote up
def h5py_dataset_iterator(self,g, prefix=''):
        for key in g.keys():
            item = g[key]
            path = '{}/{}'.format(prefix, key)
            keys = [i for i in item.keys()]
            if isinstance(item[keys[0]], h5py.Dataset): # test for dataset
                data = {'path':path}
                for k in keys:
                    if not isinstance(item[k], h5py.Group):
                        dataset = np.array(item[k].value)

                        if type(dataset) is np.ndarray:
                            if dataset.size != 0:
                                if type(dataset[0]) is np.bytes_:
                                    dataset = [a.decode('ascii') for a in dataset]

                        data.update({k:dataset})

                yield data
            else: # test for group (go down)
                yield from self.h5py_dataset_iterator(item, path) 
Example #14
Source File: test_dataset.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_int_with_minbits_lossy(self):
        """ Scaleoffset filter works for integer data with specified precision """

        nbits = 12
        shape = (100, 300)
        testdata = np.random.randint(0, 2**(nbits+1)-1, size=shape)

        dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=nbits)

        # Dataset reports scaleoffset enabled with correct precision
        self.assertTrue(dset.scaleoffset == 12)

        # Data can be written and read
        dset[...] = testdata
        filename = self.f.filename
        self.f.close()
        self.f = h5py.File(filename, 'r')
        readdata = self.f['foo'][...]

        # Compression is lossy
        assert not (readdata == testdata).all() 
Example #15
Source File: hdf5_01.py    From hangar-py with Apache License 2.0 6 votes vote down vote up
def __init__(self, repo_path: Path, schema_shape: tuple, schema_dtype: np.dtype):
        self.path: Path = repo_path
        self.schema_shape: tuple = schema_shape
        self.schema_dtype: np.dtype = schema_dtype
        self._dflt_backend_opts: Optional[dict] = None

        self.rFp: HDF5_01_MapTypes = {}
        self.wFp: HDF5_01_MapTypes = {}
        self.Fp: HDF5_01_MapTypes = ChainMap(self.rFp, self.wFp)
        self.rDatasets = SizedDict(maxsize=100)
        self.wdset: h5py.Dataset = None

        self.mode: Optional[str] = None
        self.hIdx: Optional[int] = None
        self.w_uid: Optional[str] = None
        self.hMaxSize: Optional[int] = None
        self.hNextPath: Optional[int] = None
        self.hColsRemain: Optional[int] = None

        self.STAGEDIR: Path = Path(self.path, DIR_DATA_STAGE, _FmtCode)
        self.REMOTEDIR: Path = Path(self.path, DIR_DATA_REMOTE, _FmtCode)
        self.DATADIR: Path = Path(self.path, DIR_DATA, _FmtCode)
        self.STOREDIR: Path = Path(self.path, DIR_DATA_STORE, _FmtCode)
        self.DATADIR.mkdir(exist_ok=True) 
Example #16
Source File: hdf5_00.py    From hangar-py with Apache License 2.0 6 votes vote down vote up
def __init__(self, repo_path: Path, schema_shape: tuple, schema_dtype: np.dtype):
        self.path: Path = repo_path
        self.schema_shape: tuple = schema_shape
        self.schema_dtype: np.dtype = schema_dtype
        self._dflt_backend_opts: Optional[dict] = None

        self.rFp: HDF5_00_MapTypes = {}
        self.wFp: HDF5_00_MapTypes = {}
        self.Fp: HDF5_00_MapTypes = ChainMap(self.rFp, self.wFp)
        self.rDatasets = SizedDict(maxsize=100)
        self.wdset: Optional[h5py.Dataset] = None

        self.mode: Optional[str] = None
        self.hIdx: Optional[int] = None
        self.w_uid: Optional[str] = None
        self.hMaxSize: Optional[int] = None
        self.hNextPath: Optional[int] = None
        self.hColsRemain: Optional[int] = None

        self.STAGEDIR: Path = Path(self.path, DIR_DATA_STAGE, _FmtCode)
        self.REMOTEDIR: Path = Path(self.path, DIR_DATA_REMOTE, _FmtCode)
        self.STOREDIR: Path = Path(self.path, DIR_DATA_STORE, _FmtCode)
        self.DATADIR: Path = Path(self.path, DIR_DATA, _FmtCode)
        self.DATADIR.mkdir(exist_ok=True) 
Example #17
Source File: pyanitools.py    From deepchem with MIT License 6 votes vote down vote up
def h5py_dataset_iterator(self, g, prefix=''):
    for key in g.keys():
      item = g[key]
      path = '{}/{}'.format(prefix, key)
      keys = [i for i in item.keys()]
      if isinstance(item[keys[0]], h5py.Dataset):  # test for dataset
        data = {'path': path}
        for k in keys:
          if not isinstance(item[k], h5py.Group):
            dataset = np.array(item[k].value)

            if type(dataset) is np.ndarray:
              if dataset.size != 0:
                if type(dataset[0]) is np.bytes_:
                  dataset = [a.decode('ascii') for a in dataset]

            data.update({k: dataset})

        yield data
      else:  # test for group (go down)
        for s in self.h5py_dataset_iterator(item, path):
          yield s 
Example #18
Source File: omas_h5.py    From omas with MIT License 6 votes vote down vote up
def convertDataset(ods, data):
    '''
    Recursive utility function to map HDF5 structure to ODS

    :param ods: input ODS to be populated

    :param data: HDF5 dataset of group
    '''
    import h5py
    keys = data.keys()
    try:
        keys = sorted(list(map(int, keys)))
    except ValueError:
        pass
    for oitem in keys:
        item = str(oitem)
        if item.endswith('_error_upper'):
            continue
        if isinstance(data[item], h5py.Dataset):
            ods[item] = data[item][()]
            if item + '_error_upper' in data:
                ods[item] = uarray(ods[item], data[item + '_error_upper'][()])
        elif isinstance(data[item], h5py.Group):
            convertDataset(ods[oitem], data[item]) 
Example #19
Source File: test_dataset.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_int(self):
        """ Scaleoffset filter works for integer data with default precision """

        nbits = 12
        shape = (100, 300)
        testdata = np.random.randint(0, 2**nbits-1, size=shape)

        # Create dataset; note omission of nbits (for library-determined precision)
        dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=True)

        # Dataset reports scaleoffset enabled
        assert dset.scaleoffset is not None

        # Data round-trips correctly and identically
        dset[...] = testdata
        filename = self.f.filename
        self.f.close()
        self.f = h5py.File(filename, 'r')
        readdata = self.f['foo'][...]
        self.assertArrayEqual(readdata, testdata) 
Example #20
Source File: to_hdf5.py    From mars with Apache License 2.0 6 votes vote down vote up
def tohdf5(hdf5_file, x, group=None, dataset=None, **kwds):
    import h5py

    x = astensor(x)
    if isinstance(hdf5_file, h5py.Dataset):
        filename = hdf5_file.file.filename
        group = hdf5_file.parent.name
        dataset = hdf5_file.name.rsplit('/', 1)[1]
    elif isinstance(hdf5_file, h5py.File):
        filename = hdf5_file.filename
        if dataset is None:
            raise ValueError('`dataset` should be provided')
    elif isinstance(hdf5_file, str):
        filename = hdf5_file
        if dataset is None:
            raise ValueError('`dataset` should be provided')
    else:
        raise TypeError('`hdf5_file` passed has wrong type, '
                        'expect str, h5py.File or h5py.Dataset, '
                        'got {}'.format(type(hdf5_file)))

    op = TensorHDF5DataStore(filename=filename, group=group, dataset=dataset,
                             dataset_kwds=kwds)
    return op(x) 
Example #21
Source File: hdsortsortingextractor.py    From spikeextractors with MIT License 6 votes vote down vote up
def _parse_units(file, _units):
    import h5py

    t_units = {}
    if isinstance(_units, h5py.Group):
        for name in _units.keys():
            value = _units[name]
            dict_val = []
            for val in value:
                if isinstance(file[val[0]], h5py.Dataset):
                    dict_val.append(file[val[0]][()])
                    t_units[name] = dict_val
                else:
                    break
        out = [dict(zip(t_units, col)) for col in zip(*t_units.values())]
    else:
        out = []
        for unit in _units:
            group = file[unit[()][0]]
            unit_dict = {}
            for k in group.keys():
                unit_dict[k] = group[k][()]
            out.append(unit_dict)

    return out 
Example #22
Source File: test.py    From ldpred with MIT License 6 votes vote down vote up
def h5_node_walker(h5_node, key_prefix=''):
    """Generator function that walks an hdf5 File or Group object.
    
    Args:
      h5_node: an h5py.File or h5py.Group object
      key_prefix: the '/' delimited string representing the name path of the
         node within the .hdf5 file.
    
    Yields:
      (child_key, child_value)
    """
    for k, v in h5_node.items():
        v_type = type(v)
        v_path = key_prefix + '/' + k
        if v_type == h5py.Group:
            for nested_key, nested_value in h5_node_walker(v, v_path):
                yield nested_key, nested_value
        elif v_type == h5py.Dataset:
            yield v_path, v[...]
        else:
            assert False, 'Unexpected v_type: %s' % v_type 
Example #23
Source File: file_backing.py    From anndata with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __getitem__(self, key: str) -> Union[h5py.Group, h5py.Dataset, SparseDataset]:
        return self._file[key] 
Example #24
Source File: io.py    From mxnet-lambda with Apache License 2.0 5 votes vote down vote up
def _init_data(data, allow_empty, default_name):
    """Convert data into canonical form."""
    assert (data is not None) or allow_empty
    if data is None:
        data = []

    if isinstance(data, (np.ndarray, NDArray, h5py.Dataset)
                  if h5py else (np.ndarray, NDArray)):
        data = [data]
    if isinstance(data, list):
        if not allow_empty:
            assert(len(data) > 0)
        if len(data) == 1:
            data = OrderedDict([(default_name, data[0])]) # pylint: disable=redefined-variable-type
        else:
            data = OrderedDict( # pylint: disable=redefined-variable-type
                [('_%d_%s' % (i, default_name), d) for i, d in enumerate(data)])
    if not isinstance(data, dict):
        raise TypeError("Input must be NDArray, numpy.ndarray, h5py.Dataset " + \
                "a list of them or dict with them as values")
    for k, v in data.items():
        if not isinstance(v, (NDArray, h5py.Dataset) if h5py else NDArray):
            try:
                data[k] = array(v)
            except:
                raise TypeError(("Invalid type '%s' for %s, "  % (type(v), k)) + \
                                "should be NDArray, numpy.ndarray or h5py.Dataset")

    return list(data.items()) 
Example #25
Source File: cosmodc2.py    From gcr-catalogs with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _add_to_native_quantity_collector(name, obj, collector):
    if isinstance(obj, h5py.Dataset):
        collector.add(name) 
Example #26
Source File: io.py    From mxnet-lambda with Apache License 2.0 5 votes vote down vote up
def __init__(self, data, label=None, batch_size=1, shuffle=False,
                 last_batch_handle='pad', data_name='data',
                 label_name='softmax_label'):
        super(NDArrayIter, self).__init__(batch_size)

        self.data = _init_data(data, allow_empty=False, default_name=data_name)
        self.label = _init_data(label, allow_empty=True, default_name=label_name)
        if isinstance(data, CSRNDArray) or isinstance(label, CSRNDArray):
            assert(shuffle is False), \
                  "`NDArrayIter` only supports ``CSRNDArray`` with `shuffle` set to `False`"
            assert(last_batch_handle == 'discard'), "`NDArrayIter` only supports ``CSRNDArray``" \
                                                    " with `last_batch_handle` set to `discard`."

        self.idx = np.arange(self.data[0][1].shape[0])
        # shuffle data
        if shuffle:
            np.random.shuffle(self.idx)
            self.data = [(k, array(v.asnumpy()[self.idx], v.context))
                         if not (isinstance(v, h5py.Dataset)
                                 if h5py else False) else (k, v)
                         for k, v in self.data]
            self.label = [(k, array(v.asnumpy()[self.idx], v.context))
                          if not (isinstance(v, h5py.Dataset)
                                  if h5py else False) else (k, v)
                          for k, v in self.label]

        # batching
        if last_batch_handle == 'discard':
            new_n = self.data[0][1].shape[0] - self.data[0][1].shape[0] % batch_size
            self.idx = self.idx[:new_n]

        self.data_list = [x[1] for x in self.data] + [x[1] for x in self.label]
        self.num_source = len(self.data_list)
        self.num_data = self.idx.shape[0]
        assert self.num_data >= batch_size, \
            "batch_size needs to be smaller than data size."
        self.cursor = -batch_size
        self.batch_size = batch_size
        self.last_batch_handle = last_batch_handle 
Example #27
Source File: importer.py    From nnabla with Apache License 2.0 5 votes vote down vote up
def load_parameters(self, filename):
        e = os.path.splitext(filename)[1].lower()
        if e == '.h5':
            import h5py
            with h5py.File(filename, 'r') as hd:
                keys = []

                def _get_keys(name):
                    ds = hd[name]
                    if not isinstance(ds, h5py.Dataset):
                        # Group
                        return
                    # To preserve order of parameters
                    keys.append((ds.attrs.get('index', None), name))
                hd.visit(_get_keys)
                for _, key in sorted(keys):
                    ds = hd[key]
                    parameter = self._nnp.parameter.add()
                    parameter.variable_name = key
                    parameter.shape.dim.extend(ds.shape)
                    parameter.data.extend(ds[...].flatten())
                    if ds.attrs['need_grad']:
                        parameter.need_grad = True
                    else:
                        parameter.need_grad = False

        elif e == '.protobuf':
            with open(filename, 'rb') as f:
                self._nnp.MergeFromString(f.read()) 
Example #28
Source File: anndata.py    From anndata with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def X(self) -> Optional[Union[np.ndarray, sparse.spmatrix, ArrayView]]:
        """Data matrix of shape :attr:`n_obs` × :attr:`n_vars`."""
        if self.isbacked:
            if not self.file.is_open:
                self.file.open()
            X = self.file["X"]
            if isinstance(X, h5py.Group):
                X = SparseDataset(X)
            # TODO: This should get replaced/ handled elsewhere
            # This is so that we can index into a backed dense dataset with
            # indices that aren’t strictly increasing
            if self.is_view and isinstance(X, h5py.Dataset):
                ordered = [self._oidx, self._vidx]  # this will be mutated
                rev_order = [slice(None), slice(None)]
                for axis, axis_idx in enumerate(ordered.copy()):
                    if isinstance(axis_idx, np.ndarray) and axis_idx.dtype.type != bool:
                        order = np.argsort(axis_idx)
                        ordered[axis] = axis_idx[order]
                        rev_order[axis] = np.argsort(order)
                # from hdf5, then to real order
                X = X[tuple(ordered)][tuple(rev_order)]
            elif self.is_view:
                X = X[self._oidx, self._vidx]
        elif self.is_view:
            X = as_view(
                _subset(self._adata_ref.X, (self._oidx, self._vidx)),
                ElementRef(self, "X"),
            )
        else:
            X = self._X
        return X
        # if self.n_obs == 1 and self.n_vars == 1:
        #     return X[0, 0]
        # elif self.n_obs == 1 or self.n_vars == 1:
        #     if issparse(X): X = X.toarray()
        #     return X.flatten()
        # else:
        #     return X 
Example #29
Source File: file_backing.py    From anndata with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __setitem__(
        self, key: str, value: Union[h5py.Group, h5py.Dataset, SparseDataset]
    ):
        self._file[key] = value 
Example #30
Source File: sparse_dataset.py    From anndata with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def copy(self) -> ss.spmatrix:
        if isinstance(self.data, h5py.Dataset):
            return SparseDataset(self.data.parent).to_memory()
        else:
            return super().copy()