Python h5py.special_dtype() Examples

The following are 30 code examples for showing how to use h5py.special_dtype(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module h5py , or try the search function .

Example 1
def set_predicted_description(self, split, data_key, sentence):
        '''
        Set the predicted sentence tokens in the data_key group,
        creating the group if necessary, or erasing the current value if
        necessary.
        '''

        if self.openmode != "r+":
            # forcefully quit when trying to write to a read-only file
            raise RuntimeError("Dataset is read-only, try again with --h5_writable")

        dataset_key = 'predicted_description'

        try:
            predicted_text = self.dataset[split][data_key].create_dataset(dataset_key, (1,), dtype=h5py.special_dtype(vlen=unicode))
        except RuntimeError:
            # the dataset already exists, erase it and create an empty space
            del self.dataset[split][data_key][dataset_key]
            predicted_text = self.dataset[split][data_key].create_dataset(dataset_key, (1,), dtype=h5py.special_dtype(vlen=unicode))

        predicted_text[0] = " ".join([x for x in sentence]) 
Example 2
Project: calamari   Author: Calamari-OCR   File: hdf5_dataset_writer.py    License: Apache License 2.0 6 votes vote down vote up
def finish_chunck(self):
        if len(self.text) == 0:
            return

        codec = self.compute_codec()

        filename = "{}_{:03d}{}".format(self.output_filename, self.current_chunk, DataSetType.gt_extension(DataSetType.HDF5))
        self.files.append(filename)
        file = h5py.File(filename, 'w')
        dti32 = h5py.special_dtype(vlen=np.dtype('int32'))
        dtui8 = h5py.special_dtype(vlen=np.dtype('uint8'))
        file.create_dataset('transcripts', (len(self.text),), dtype=dti32, compression='gzip')
        file.create_dataset('images_dims', data=[d.shape for d in self.data], dtype=int)
        file.create_dataset('images', (len(self.text),), dtype=dtui8, compression='gzip')
        file.create_dataset('codec', data=list(map(ord, codec)))
        file['transcripts'][...] = [list(map(codec.index, d)) for d in self.text]
        file['images'][...] = [d.reshape(-1) for d in self.data]
        file.close()

        self.current_chunk += 1
        self.data = []
        self.text = [] 
Example 3
Project: implicit   Author: benfred   File: movielens.py    License: MIT License 6 votes vote down vote up
def _hfd5_from_dataframe(ratings, movies, outputfilename):
    # transform ratings dataframe into a sparse matrix
    m = coo_matrix((ratings['rating'].astype(np.float32),
                   (ratings['movieId'], ratings['userId']))).tocsr()

    with h5py.File(outputfilename, "w") as f:
        # write out the ratings matrix
        g = f.create_group('movie_user_ratings')
        g.create_dataset("data", data=m.data)
        g.create_dataset("indptr", data=m.indptr)
        g.create_dataset("indices", data=m.indices)

        # write out the titles as a numpy array
        titles = np.empty(shape=(movies.movieId.max()+1,), dtype=np.object)
        titles[movies.movieId] = movies.title
        dt = h5py.special_dtype(vlen=str)
        dset = f.create_dataset('movie', (len(titles),), dtype=dt)
        dset[:] = titles 
Example 4
Project: implicit   Author: benfred   File: lastfm.py    License: MIT License 6 votes vote down vote up
def _hfd5_from_dataframe(data, outputfilename):
    # create a sparse matrix of all the users/plays
    plays = coo_matrix((data['plays'].astype(np.float32),
                       (data['artist'].cat.codes.copy(),
                        data['user'].cat.codes.copy()))).tocsr()

    with h5py.File(outputfilename, "w") as f:
        g = f.create_group('artist_user_plays')
        g.create_dataset("data", data=plays.data)
        g.create_dataset("indptr", data=plays.indptr)
        g.create_dataset("indices", data=plays.indices)

        dt = h5py.special_dtype(vlen=str)
        artist = list(data['artist'].cat.categories)
        dset = f.create_dataset('artist', (len(artist),), dtype=dt)
        dset[:] = artist

        user = list(data['user'].cat.categories)
        dset = f.create_dataset('user', (len(user),), dtype=dt)
        dset[:] = user 
Example 5
Project: implicit   Author: benfred   File: sketchfab.py    License: MIT License 6 votes vote down vote up
def _hfd5_from_dataframe(data, outputfilename):
    items = data['mid'].cat.codes.copy()
    users = data['uid'].cat.codes.copy()
    values = np.ones(len(items)).astype(np.float32)

    # create a sparse matrix of all the item/users/likes
    likes = coo_matrix((values, (items, users))).astype(np.float32).tocsr()

    with h5py.File(outputfilename, "w") as f:
        g = f.create_group('item_user_likes')
        g.create_dataset("data", data=likes.data)
        g.create_dataset("indptr", data=likes.indptr)
        g.create_dataset("indices", data=likes.indices)

        dt = h5py.special_dtype(vlen=str)
        item = list(data['mid'].cat.categories)
        dset = f.create_dataset('item', (len(item),), dtype=dt)
        dset[:] = item

        user = list(data['uid'].cat.categories)
        dset = f.create_dataset('user', (len(user),), dtype=dt)
        dset[:] = user 
Example 6
Project: implicit   Author: benfred   File: million_song_dataset.py    License: MIT License 6 votes vote down vote up
def _hfd5_from_dataframe(data, track_info, outputfilename):
    # create a sparse matrix of all the users/plays
    plays = coo_matrix((data['plays'].astype(np.float32),
                       (data['track'].cat.codes.copy(),
                        data['user'].cat.codes.copy()))).tocsr()

    with h5py.File(outputfilename, "w") as f:
        g = f.create_group('track_user_plays')
        g.create_dataset("data", data=plays.data)
        g.create_dataset("indptr", data=plays.indptr)
        g.create_dataset("indices", data=plays.indices)

        dt = h5py.special_dtype(vlen=str)
        dset = f.create_dataset('track', track_info.shape, dtype=dt)
        dset[:] = track_info

        user = list(data['user'].cat.categories)
        dset = f.create_dataset('user', (len(user),), dtype=dt)
        dset[:] = user 
Example 7
Project: keras-image-segmentation   Author: dhkim0225   File: h5_test.py    License: MIT License 6 votes vote down vote up
def write_data(h5py_file, mode, x_paths, y_paths):
    num_data = len(x_paths)

    uint8_dt = h5py.special_dtype(vlen=np.uint8)
    string_dt = h5py.special_dtype(vlen=str)

    group = h5py_file.create_group(mode)
    h5_name = group.create_dataset('name', shape=(num_data,), dtype=string_dt)
    h5_image = group.create_dataset('image', shape=(num_data,), dtype=uint8_dt)
    h5_label = group.create_dataset('label', shape=(num_data,), dtype=uint8_dt)

    h5_image.attrs['size'] = [256,512,3]
    h5_label.attrs['size'] = [256,512,1]

    for i in range(num_data):
        x_img = cv2.imread(x_paths[i], 1)
        y_img = cv2.imread(y_paths[i], 0)
        x_img = cv2.resize(x_img, None, fx=0.25, fy=0.25, interpolation=cv2.INTER_LINEAR)
        y_img = cv2.resize(y_img, None, fx=0.25, fy=0.25, interpolation=cv2.INTER_NEAREST)

        h5_image[i] = x_img.flatten()
        h5_label[i] = y_img.flatten()
        h5_name[i] = os.path.basename(x_paths[i])

        # break 
Example 8
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_dataset.py    License: MIT License 6 votes vote down vote up
def test_int(self):
        dt = h5py.special_dtype(vlen=int)
        ds = self.f.create_dataset('vlen', (4,), dtype=dt)
        ds[0] = np.arange(3)
        ds[1] = np.arange(0)
        ds[2] = [1, 2, 3]
        ds[3] = np.arange(1)
        self.assertArrayEqual(ds[0], np.arange(3))
        self.assertArrayEqual(ds[1], np.arange(0))
        self.assertArrayEqual(ds[2], np.array([1, 2, 3]))
        self.assertArrayEqual(ds[1], np.arange(0))
        ds[0:2] = np.array([np.arange(5), np.arange(4)])
        self.assertArrayEqual(ds[0], np.arange(5))
        self.assertArrayEqual(ds[1], np.arange(4))
        ds[0:2] = np.array([np.arange(3), np.arange(3)])
        self.assertArrayEqual(ds[0], np.arange(3))
        self.assertArrayEqual(ds[1], np.arange(3)) 
Example 9
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_dataset.py    License: MIT License 6 votes vote down vote up
def test_convert(self):
        dt = h5py.special_dtype(vlen=int)
        ds = self.f.create_dataset('vlen', (3,), dtype=dt)
        ds[0] = np.array([1.4, 1.2])
        ds[1] = np.array([1.2])
        ds[2] = [1.2, 2, 3]
        self.assertArrayEqual(ds[0], np.array([1, 1]))
        self.assertArrayEqual(ds[1], np.array([1]))
        self.assertArrayEqual(ds[2], np.array([1, 2, 3]))
        ds[0:2] = np.array([[0.1, 1.1, 2.1, 3.1, 4], np.arange(4)])
        self.assertArrayEqual(ds[0], np.arange(5))
        self.assertArrayEqual(ds[1], np.arange(4))
        ds[0:2] = np.array([np.array([0.1, 1.2, 2.2]),
                            np.array([0.2, 1.2, 2.2])])
        self.assertArrayEqual(ds[0], np.arange(3))
        self.assertArrayEqual(ds[1], np.arange(3)) 
Example 10
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_datatype.py    License: MIT License 6 votes vote down vote up
def test_compound_vlen_enum(self):
        eidt = h5py.special_dtype(enum=(np.uint8, {'OFF': 0, 'ON': 1}))
        vidt = h5py.special_dtype(vlen=np.uint8)
        def a(items):
            return np.array(items, dtype=np.uint8)

        f = self.f

        dt_vve = np.dtype([
            ('foo', vidt),
            ('bar', vidt),
            ('switch', eidt)])
        vve = f.create_dataset('dt_vve', shape=(2,), dtype=dt_vve)
        data = np.array([(a([1,2,3]), a([1,2]),   1),
                         (a([]),      a([2,4,6]), 0),],
                         dtype=dt_vve)
        vve[:] = data
        actual = vve[:]
        self.assertVlenArrayEqual(data['foo'], actual['foo'])
        self.assertVlenArrayEqual(data['bar'], actual['bar'])
        self.assertArrayEqual(data['switch'], actual['switch']) 
Example 11
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_datatype.py    License: MIT License 6 votes vote down vote up
def test_vlen_enum(self):
        fname = self.mktemp()
        arr1 = [[1],[1,2]]
        dt1 = h5py.special_dtype(vlen=h5py.special_dtype(
            enum=('i', dict(foo=1, bar=2))))

        with h5py.File(fname,'w') as f:
            df1 = f.create_dataset('test', (len(arr1),), dtype=dt1)
            df1[:] = np.array(arr1)

        with h5py.File(fname,'r') as f:
            df2  = f['test']
            dt2  = df2.dtype
            arr2 = [e.tolist() for e in df2[:]]

        self.assertEqual(arr1, arr2)
        self.assertEqual(h5py.check_dtype(enum=h5py.check_dtype(vlen=dt1)),
                         h5py.check_dtype(enum=h5py.check_dtype(vlen=dt2))) 
Example 12
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_datatype.py    License: MIT License 6 votes vote down vote up
def test_compound_vlen(self):
        vidt = h5py.special_dtype(vlen=np.uint8)
        eidt = h5py.special_dtype(enum=(np.uint8, {'OFF': 0, 'ON': 1}))

        for np_align in (False, True):
            dt = np.dtype([
                ('a', eidt),
                ('foo', vidt),
                ('bar', vidt),
                ('switch', eidt)], align=np_align)
            np_offsets = [dt.fields[i][1] for i in dt.names]

            for logical in (False, True):
                if logical and np_align:
                    # Vlen types have different size in the numpy struct
                    self.assertRaises(TypeError, h5py.h5t.py_create, dt,
                            logical=logical)
                else:
                    ht = h5py.h5t.py_create(dt, logical=logical)
                    offsets = [ht.get_member_offset(i)
                               for i in range(ht.get_nmembers())]
                    if np_align:
                        self.assertEqual(np_offsets, offsets) 
Example 13
Project: attention-lvcsr   Author: rizar   File: ilsvrc2010.py    License: MIT License 6 votes vote down vote up
def prepare_hdf5_file(hdf5_file, n_train, n_valid, n_test):
    """Create datasets within a given HDF5 file.

    Parameters
    ----------
    hdf5_file : :class:`h5py.File` instance
        HDF5 file handle to which to write.
    n_train : int
        The number of training set examples.
    n_valid : int
        The number of validation set examples.
    n_test : int
        The number of test set examples.

    """
    n_total = n_train + n_valid + n_test
    splits = create_splits(n_train, n_valid, n_test)
    hdf5_file.attrs['split'] = H5PYDataset.create_split_array(splits)
    vlen_dtype = h5py.special_dtype(vlen=numpy.dtype('uint8'))
    hdf5_file.create_dataset('encoded_images', shape=(n_total,),
                             dtype=vlen_dtype)
    hdf5_file.create_dataset('targets', shape=(n_total, 1), dtype=numpy.int16)
    hdf5_file.create_dataset('filenames', shape=(n_total, 1), dtype='S32') 
Example 14
Project: PyINT   Author: ymcmrs   File: _utils.py    License: GNU General Public License v3.0 6 votes vote down vote up
def write_h5(datasetDict, out_file, metadata=None, ref_file=None, compression=None):

    if os.path.isfile(out_file):
        print('delete exsited file: {}'.format(out_file))
        os.remove(out_file)

    print('create HDF5 file: {} with w mode'.format(out_file))
    dt = h5py.special_dtype(vlen=np.dtype('float64'))

    
    with h5py.File(out_file, 'w') as f:
        for dsName in datasetDict.keys():
            data = datasetDict[dsName]
            ds = f.create_dataset(dsName,
                              data=data,
                              compression=compression)
        
        for key, value in metadata.items():
            f.attrs[key] = str(value)
            #print(key + ': ' +  value)
    print('finished writing to {}'.format(out_file))
        
    return out_file 
    
###################################################################### 
Example 15
Project: costar_plan   Author: jhu-lcsr   File: h5f.py    License: Apache License 2.0 6 votes vote down vote up
def write(self, example, filename, image_types=[]):
        '''
        Write an example out to disk.

        status: success, failure or error.failure
        '''
        filename = os.path.join(self.name, filename)
        f = h5f.File(filename, 'w')
        if image_types != []:
            dt = h5f.special_dtype(vlen=bytes)
            for (img_type_str, img_format_str) in image_types:
                f.create_dataset("type_" + img_type_str, data=[img_format_str])
        for key, value in example.items():
            if self.verbose > 0:
                print('H5fDataset writing key: ' + str(key))
            f.create_dataset(key, data=value)
        f.close() 
Example 16
Project: anndata   Author: theislab   File: h5ad.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def write_series(group, key, series, dataset_kwargs=MappingProxyType({})):
    # group here is an h5py type, otherwise categoricals won’t write
    if series.dtype == object:  # Assuming it’s string
        group.create_dataset(
            key,
            data=series.values,
            dtype=h5py.special_dtype(vlen=str),
            **dataset_kwargs,
        )
    elif is_categorical_dtype(series):
        # This should work for categorical Index and Series
        categorical: pd.Categorical = series.values
        categories: np.ndarray = categorical.categories.values
        codes: np.ndarray = categorical.codes
        category_key = f"__categories/{key}"

        write_array(group, category_key, categories, dataset_kwargs=dataset_kwargs)
        write_array(group, key, codes, dataset_kwargs=dataset_kwargs)

        group[key].attrs["categories"] = group[category_key].ref
        group[category_key].attrs["ordered"] = categorical.ordered
    else:
        group[key] = series.values 
Example 17
Project: h5netcdf   Author: shoyer   File: legacyapi.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def createVariable(self, varname, datatype, dimensions=(), zlib=False,
                       complevel=4, shuffle=True, fletcher32=False,
                       chunksizes=None, fill_value=None):
        if len(dimensions) == 0:  # it's a scalar
            # rip off chunk and filter options for consistency with netCDF4-python

            chunksizes = None
            zlib = False
            fletcher32 = False
            shuffle = False

        if datatype is str:
            datatype = h5py.special_dtype(vlen=unicode)

        kwds = {}
        if zlib:
            # only add compression related keyword arguments if relevant (h5py
            # chokes otherwise)
            kwds['compression'] = 'gzip'
            kwds['compression_opts'] = complevel
            kwds['shuffle'] = shuffle

        return super(Group, self).create_variable(
            varname, dimensions, dtype=datatype, fletcher32=fletcher32,
            chunks=chunksizes, fillvalue=fill_value, **kwds) 
Example 18
Project: fuel   Author: mila-iqia   File: ilsvrc2010.py    License: MIT License 6 votes vote down vote up
def prepare_hdf5_file(hdf5_file, n_train, n_valid, n_test):
    """Create datasets within a given HDF5 file.

    Parameters
    ----------
    hdf5_file : :class:`h5py.File` instance
        HDF5 file handle to which to write.
    n_train : int
        The number of training set examples.
    n_valid : int
        The number of validation set examples.
    n_test : int
        The number of test set examples.

    """
    n_total = n_train + n_valid + n_test
    splits = create_splits(n_train, n_valid, n_test)
    hdf5_file.attrs['split'] = H5PYDataset.create_split_array(splits)
    vlen_dtype = h5py.special_dtype(vlen=numpy.dtype('uint8'))
    hdf5_file.create_dataset('encoded_images', shape=(n_total,),
                             dtype=vlen_dtype)
    hdf5_file.create_dataset('targets', shape=(n_total, 1), dtype=numpy.int16)
    hdf5_file.create_dataset('filenames', shape=(n_total, 1), dtype='S32') 
Example 19
Project: fuel   Author: mila-iqia   File: ilsvrc2012.py    License: MIT License 6 votes vote down vote up
def prepare_hdf5_file(hdf5_file, n_train, n_valid, n_test):
    """Create datasets within a given HDF5 file.

    Parameters
    ----------
    hdf5_file : :class:`h5py.File` instance
        HDF5 file handle to which to write.
    n_train : int
        The number of training set examples.
    n_valid : int
        The number of validation set examples.
    n_test : int
        The number of test set examples.

    """
    n_total = n_train + n_valid + n_test
    n_labeled = n_train + n_valid
    splits = create_splits(n_train, n_valid, n_test)
    hdf5_file.attrs['split'] = H5PYDataset.create_split_array(splits)
    vlen_dtype = h5py.special_dtype(vlen=numpy.dtype('uint8'))
    hdf5_file.create_dataset('encoded_images', shape=(n_total,),
                             dtype=vlen_dtype)
    hdf5_file.create_dataset('targets', shape=(n_labeled, 1),
                             dtype=numpy.int16)
    hdf5_file.create_dataset('filenames', shape=(n_total, 1), dtype='S32') 
Example 20
Project: pyPESTO   Author: ICB-DCM   File: hdf5.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def write_string_array(f: h5py.Group,
                       path: str,
                       strings: Collection) -> None:
    """
    Write string array to hdf5

    Parameters
    -------------
    f:
        h5py.Group where dataset should be created
    path:
        path of the dataset to create
    strings:
        list of strings to be written to f
    """
    dt = h5py.special_dtype(vlen=str)
    dset = f.create_dataset(path, (len(strings),), dtype=dt)
    dset[:] = [s.encode('utf8') for s in strings] 
Example 21
Project: text_embedding   Author: NLPrinceton   File: vectors.py    License: MIT License 6 votes vote down vote up
def text2hdf5(textfile, hdf5file, **kwargs):
  '''converts word embeddings file from text to HDF5 format
  Args:
      textfile: word embeddings file in format "word float ... float\n"
      hdf5file: output file ; will have keys 'words' and 'vectors'
      kwargs: passed to load
  Returns:
      None
  '''

  words, vectors = zip(*load(textfile, **kwargs))
  f = h5py.File(hdf5file)
  f.create_dataset('words', (len(words),), dtype=h5py.special_dtype(vlen=str))
  for i, word in enumerate(words):
      f['words'][i] = word
  f.create_dataset('vectors', data=np.vstack(vectors))
  f.close() 
Example 22
Project: PySimulator   Author: PySimulator   File: pyMtsf.py    License: GNU Lesser General Public License v3.0 6 votes vote down vote up
def WriteUnits(self):
        if len(self.units) == 0:
            return

        #maxLenTypeName = self._getMaxLength([x.name for x in self.units])
        numpyDataType = numpy.dtype({'names': ['name', 'factor',
                                              'offset', 'mode'],
                               'formats': [h5py.special_dtype(vlen=unicode),
                                          'double',
                                          'double',
                                          h5py.special_dtype(enum=(numpy.uint8, {'BaseUnit':0, 'Unit':1, 'DefaultDisplayUnit':2}))]})  # 'uint8']})

        dataset = self.description.create_dataset('Units', (len(self.units), 1), dtype=numpyDataType, maxshape=(len(self.units), 1), compression='gzip')
        allData = []
        for unit in self.units:
            allData.append((unit.name, unit.factor, unit.offset, unit.mode))
        dataset[:, 0] = allData 
Example 23
Project: calamari   Author: Calamari-OCR   File: dataset.py    License: Apache License 2.0 5 votes vote down vote up
def store(self, extension):
        for filename, data in self.prediction.items():
            texts = data['transcripts']
            codec = data['codec']
            basename, ext = split_all_ext(filename)
            with h5py.File(basename + extension, 'w') as file:
                dt = h5py.special_dtype(vlen=np.dtype('int32'))
                file.create_dataset('transcripts', (len(texts),), dtype=dt)
                file['transcripts'][...] = texts
                file.create_dataset('codec', data=list(map(ord, codec))) 
Example 24
Project: MSMARCO   Author: spacemanidol   File: checkpointing.py    License: MIT License 5 votes vote down vote up
def save_vocab(checkpoint, path, id_to_symb):
    """
    Save a vocab, that is id to symbol mapping, into a hp5y file.
    Used for checkpoints and such.
    """
    vocab = np.array([
        id_to_symb[ind] for ind in range(len(id_to_symb))])
    vocab = vocab.astype(h5py.special_dtype(vlen=str))
    if path in checkpoint:
        del checkpoint[path]

    checkpoint.create_dataset(path, data=vocab, compression='gzip')
    return 
Example 25
Project: keras-image-segmentation   Author: dhkim0225   File: make_h5.py    License: MIT License 5 votes vote down vote up
def write_data(h5py_file, mode, x_paths, y_paths):
    num_data = len(x_paths)

    # h5py special data type for image.
    uint8_dt = h5py.special_dtype(vlen=np.dtype('uint8'))

    # Make group and data set.
    group = h5py_file.create_group(mode)
    x_dset = group.create_dataset('x', shape=(num_data, ), dtype=uint8_dt)
    y_dset = group.create_dataset('y', shape=(num_data, ), dtype=uint8_dt)

    for i in range(num_data):
        # Read image and resize
        x_img = cv2.imread(x_paths[i])
        x_img = cv2.resize(x_img, None, fx=0.25, fy=0.25, interpolation=cv2.INTER_CUBIC)
        x_img = cv2.cvtColor(x_img, cv2.COLOR_BGR2RGB)

        y_img = cv2.imread(y_paths[i])
        y_img = cv2.resize(y_img, None, fx=0.25, fy=0.25, interpolation=cv2.INTER_NEAREST)
        y_img = y_img[:, :, 0]

        x_dset[i] = x_img.flatten()
        y_dset[i] = y_img.flatten()


# Make h5 file. 
Example 26
Project: MSMARCO-Question-Answering   Author: microsoft   File: checkpointing.py    License: MIT License 5 votes vote down vote up
def save_vocab(checkpoint, path, id_to_symb):
    """
    Save a vocab, that is id to symbol mapping, into a hp5y file.
    Used for checkpoints and such.
    """
    vocab = np.array([
        id_to_symb[ind] for ind in range(len(id_to_symb))])
    vocab = vocab.astype(h5py.special_dtype(vlen=str))
    if path in checkpoint:
        del checkpoint[path]

    checkpoint.create_dataset(path, data=vocab, compression='gzip')
    return 
Example 27
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_group.py    License: MIT License 5 votes vote down vote up
def test_reference_numpyobj(self):
        """ Object can be opened by numpy.object_ containing object ref

        Test for issue 181, issue 202.
        """
        g = self.f.create_group('test')

        rtype = h5py.special_dtype(ref=h5py.Reference)
        dt = np.dtype([('a', 'i'),('b',rtype)])
        dset = self.f.create_dataset('test_dset', (1,), dt)

        dset[0] =(42,g.ref)
        data = dset[0]
        self.assertEqual(self.f[data[1]], g) 
Example 28
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_dataset.py    License: MIT License 5 votes vote down vote up
def test_vlen_bytes(self):
        """ Vlen bytes dataset maps to vlen ascii in the file """
        dt = h5py.special_dtype(vlen=bytes)
        ds = self.f.create_dataset('x', (100,), dtype=dt)
        tid = ds.id.get_type()
        self.assertEqual(type(tid), h5py.h5t.TypeStringID)
        self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII) 
Example 29
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_dataset.py    License: MIT License 5 votes vote down vote up
def test_vlen_unicode(self):
        """ Vlen unicode dataset maps to vlen utf-8 in the file """
        dt = h5py.special_dtype(vlen=six.text_type)
        ds = self.f.create_dataset('x', (100,), dtype=dt)
        tid = ds.id.get_type()
        self.assertEqual(type(tid), h5py.h5t.TypeStringID)
        self.assertEqual(tid.get_cset(), h5py.h5t.CSET_UTF8) 
Example 30
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_dataset.py    License: MIT License 5 votes vote down vote up
def test_roundtrip_vlen_bytes(self):
        """ writing and reading to vlen bytes dataset preserves type and content
        """
        dt = h5py.special_dtype(vlen=bytes)
        ds = self.f.create_dataset('x', (100,), dtype=dt)
        data = b"Hello\xef"
        ds[0] = data
        out = ds[0]
        self.assertEqual(type(out), bytes)
        self.assertEqual(out, data)