Python numpy.memmap() Examples

The following are 30 code examples of numpy.memmap(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module numpy , or try the search function .
Example #1
Source File: memmap.py    From Computable with MIT License 6 votes vote down vote up
def flush(self):
        """
        Write any changes in the array to the file on disk.

        For further information, see `memmap`.

        Parameters
        ----------
        None

        See Also
        --------
        memmap

        """
        if self.base is not None and hasattr(self.base, 'flush'):
            self.base.flush() 
Example #2
Source File: loader.py    From Automatic-Identification-and-Counting-of-Blood-Cells with GNU General Public License v3.0 6 votes vote down vote up
def walk(self, size):
        if self.eof: return None
        end_point = self.offset + 4 * size
        assert end_point <= self.size, \
        'Over-read {}'.format(self.path)

        float32_1D_array = np.memmap(
            self.path, shape = (), mode = 'r', 
            offset = self.offset,
            dtype='({})float32,'.format(size)
        )

        self.offset = end_point
        if end_point == self.size: 
            self.eof = True
        return float32_1D_array 
Example #3
Source File: loader.py    From Traffic-Signs-and-Object-Detection with GNU General Public License v3.0 6 votes vote down vote up
def walk(self, size):
        if self.eof: return None
        end_point = self.offset + 4 * size
        assert end_point <= self.size, \
        'Over-read {}'.format(self.path)

        float32_1D_array = np.memmap(
            self.path, shape = (), mode = 'r', 
            offset = self.offset,
            dtype='({})float32,'.format(size)
        )

        self.offset = end_point
        if end_point == self.size: 
            self.eof = True
        return float32_1D_array 
Example #4
Source File: loader.py    From Traffic_sign_detection_YOLO with MIT License 6 votes vote down vote up
def walk(self, size):
        if self.eof: return None
        end_point = self.offset + 4 * size
        assert end_point <= self.size, \
        'Over-read {}'.format(self.path)

        float32_1D_array = np.memmap(
            self.path, shape = (), mode = 'r', 
            offset = self.offset,
            dtype='({})float32,'.format(size)
        )

        self.offset = end_point
        if end_point == self.size: 
            self.eof = True
        return float32_1D_array 
Example #5
Source File: memmap.py    From recruit with Apache License 2.0 6 votes vote down vote up
def flush(self):
        """
        Write any changes in the array to the file on disk.

        For further information, see `memmap`.

        Parameters
        ----------
        None

        See Also
        --------
        memmap

        """
        if self.base is not None and hasattr(self.base, 'flush'):
            self.base.flush() 
Example #6
Source File: spikeglx.py    From ibllib with MIT License 6 votes vote down vote up
def __init__(self, sglx_file):
        self.file_bin = Path(sglx_file)
        self.nbytes = self.file_bin.stat().st_size
        file_meta_data = Path(sglx_file).with_suffix('.meta')
        if not file_meta_data.exists():
            self.file_meta_data = None
            self.meta = None
            self.channel_conversion_sample2v = 1
            _logger.warning(str(sglx_file) + " : no metadata file found. Very limited support")
            return
        # normal case we continue reading and interpreting the metadata file
        self.file_meta_data = file_meta_data
        self.meta = read_meta_data(file_meta_data)
        self.channel_conversion_sample2v = _conversion_sample2v_from_meta(self.meta)
        # if we are not looking at a compressed file, use a memmap, otherwise instantiate mtscomp
        if self.is_mtscomp:
            self.data = mtscomp.Reader()
            self.data.open(self.file_bin, self.file_bin.with_suffix('.ch'))
        else:
            if self.nc * self.ns * 2 != self.nbytes:
                _logger.warning(str(sglx_file) + " : meta data and filesize do not checkout")
            self.data = np.memmap(sglx_file, dtype='int16', mode='r', shape=(self.ns, self.nc)) 
Example #7
Source File: MetaArray.py    From tf-pose with Apache License 2.0 6 votes vote down vote up
def _readData1(self, fd, meta, mmap=False, **kwds):
        ## Read array data from the file descriptor for MetaArray v1 files
        ## read in axis values for any axis that specifies a length
        frameSize = 1
        for ax in meta['info']:
            if 'values_len' in ax:
                ax['values'] = np.fromstring(fd.read(ax['values_len']), dtype=ax['values_type'])
                frameSize *= ax['values_len']
                del ax['values_len']
                del ax['values_type']
        self._info = meta['info']
        if not kwds.get("readAllData", True):
            return
        ## the remaining data is the actual array
        if mmap:
            subarr = np.memmap(fd, dtype=meta['type'], mode='r', shape=meta['shape'])
        else:
            subarr = np.fromstring(fd.read(), dtype=meta['type'])
            subarr.shape = meta['shape']
        self._data = subarr 
Example #8
Source File: memmap.py    From lambda-packs with MIT License 6 votes vote down vote up
def flush(self):
        """
        Write any changes in the array to the file on disk.

        For further information, see `memmap`.

        Parameters
        ----------
        None

        See Also
        --------
        memmap

        """
        if self.base is not None and hasattr(self.base, 'flush'):
            self.base.flush() 
Example #9
Source File: indexed_dataset.py    From fairseq with MIT License 6 votes vote down vote up
def __init__(self, path):
            with open(path, 'rb') as stream:
                magic_test = stream.read(9)
                assert self._HDR_MAGIC == magic_test, (
                    'Index file doesn\'t match expected format. '
                    'Make sure that --dataset-impl is configured properly.'
                )
                version = struct.unpack('<Q', stream.read(8))
                assert (1,) == version

                dtype_code, = struct.unpack('<B', stream.read(1))
                self._dtype = dtypes[dtype_code]
                self._dtype_size = self._dtype().itemsize

                self._len = struct.unpack('<Q', stream.read(8))[0]
                offset = stream.tell()

            _warmup_mmap_file(path)

            self._bin_buffer_mmap = np.memmap(path, mode='r', order='C')
            self._bin_buffer = memoryview(self._bin_buffer_mmap)
            self._sizes = np.frombuffer(self._bin_buffer, dtype=np.int32, count=self._len, offset=offset)
            self._pointers = np.frombuffer(self._bin_buffer, dtype=np.int64, count=self._len,
                                           offset=offset + self._sizes.nbytes) 
Example #10
Source File: memmap.py    From auto-alt-text-lambda-api with MIT License 6 votes vote down vote up
def flush(self):
        """
        Write any changes in the array to the file on disk.

        For further information, see `memmap`.

        Parameters
        ----------
        None

        See Also
        --------
        memmap

        """
        if self.base is not None and hasattr(self.base, 'flush'):
            self.base.flush() 
Example #11
Source File: test_extractors.py    From spikeextractors with MIT License 6 votes vote down vote up
def test_allocate_arrays(self):
        shape = (30, 1000)
        dtype = 'int16'

        arr_in_memory = self.RX.allocate_array(shape=shape, dtype=dtype, memmap=False)
        arr_memmap = self.RX.allocate_array(shape=shape, dtype=dtype, memmap=True)

        assert isinstance(arr_in_memory, np.ndarray)
        assert isinstance(arr_memmap, np.memmap)
        assert arr_in_memory.shape == shape
        assert arr_memmap.shape == shape
        assert arr_in_memory.dtype == dtype
        assert arr_memmap.dtype == dtype

        arr_in_memory = self.SX.allocate_array(shape=shape, dtype=dtype, memmap=False)
        arr_memmap = self.SX.allocate_array(shape=shape, dtype=dtype, memmap=True)

        assert isinstance(arr_in_memory, np.ndarray)
        assert isinstance(arr_memmap, np.memmap)
        assert arr_in_memory.shape == shape
        assert arr_memmap.shape == shape
        assert arr_in_memory.dtype == dtype
        assert arr_memmap.dtype == dtype 
Example #12
Source File: readSGLX.py    From spikeextractors with MIT License 6 votes vote down vote up
def makeMemMapRaw(binFullPath, meta):
    nChan = int(meta['nSavedChans'])
    nFileSamp = int(int(meta['fileSizeBytes'])/(2*nChan))
    print("nChan: %d, nFileSamp: %d" % (nChan, nFileSamp))
    rawData = np.memmap(binFullPath, dtype='int16', mode='r',
                        shape=(nChan, nFileSamp), offset=0, order='F')
    return(rawData)


# Return an array [lines X timepoints] of uint8 values for a
# specified set of digital lines.
#
# - dwReq is the zero-based index into the saved file of the
#    16-bit word that contains the digital lines of interest.
# - dLineList is a zero-based list of one or more lines/bits
#    to scan from word dwReq.
# 
Example #13
Source File: data.py    From translate with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def load(self, path, num_examples_limit: Optional[int] = None):
        with PathManager.open(path, "rb") as f:
            npz = np.load(f)

            # For big input data, we don't want the cpu to OOM.
            # Therefore, we are loading the huge buffer array into disc
            # and reading it from disc instead of memory.
            if npz["buffer"].nbytes > ARRAY_SIZE_LIMIT_FOR_MEMORY:
                self.buffer = np.memmap(
                    tempfile.NamedTemporaryFile().name,
                    dtype="float32",
                    mode="w+",
                    shape=npz["buffer"].shape,
                )
                self.buffer[:] = npz["buffer"][:]
            else:
                self.buffer = npz["buffer"]
            self.offsets = npz["offsets"]

        if num_examples_limit is not None and len(self.offsets) > num_examples_limit:
            self.offsets = self.offsets[: num_examples_limit + 1]
            self.buffer = self.buffer[: self.offsets[-1]]
        self.sizes = self.offsets[1:] - self.offsets[:-1] 
Example #14
Source File: hashing.py    From estimators with MIT License 6 votes vote down vote up
def __init__(self, hash_name='md5', coerce_mmap=False):
        """
            Parameters
            ----------
            hash_name: string
                The hash algorithm to be used
            coerce_mmap: boolean
                Make no difference between np.memmap and np.ndarray
                objects.
        """
        self.coerce_mmap = coerce_mmap
        Hasher.__init__(self, hash_name=hash_name)
        # delayed import of numpy, to avoid tight coupling
        import numpy as np
        self.np = np
        if hasattr(np, 'getbuffer'):
            self._getbuffer = np.getbuffer
        else:
            self._getbuffer = memoryview 
Example #15
Source File: hashing.py    From estimators with MIT License 6 votes vote down vote up
def hash(obj, hash_name='md5', coerce_mmap=False):
    """ Quick calculation of a hash to identify uniquely Python objects
        containing numpy arrays.
        Parameters
        -----------
        hash_name: 'md5' or 'sha1'
            Hashing algorithm used. sha1 is supposedly safer, but md5 is
            faster.
        coerce_mmap: boolean
            Make no difference between np.memmap and np.ndarray
    """
    if 'numpy' in sys.modules:
        hasher = NumpyHasher(hash_name=hash_name, coerce_mmap=coerce_mmap)
    else:
        hasher = Hasher(hash_name=hash_name)
    return hasher.hash(obj) 
Example #16
Source File: memmap.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def flush(self):
        """
        Write any changes in the array to the file on disk.

        For further information, see `memmap`.

        Parameters
        ----------
        None

        See Also
        --------
        memmap

        """
        if self.base is not None and hasattr(self.base, 'flush'):
            self.base.flush() 
Example #17
Source File: backend.py    From mlens with MIT License 6 votes vote down vote up
def _gen_prediction_array(self, task, job, threading):
        """Generate prediction array either in-memory or persist to disk."""
        shape = task.shape(job)
        if threading:
            self.job.predict_out = np.zeros(shape, dtype=_dtype(task))
        else:
            f = os.path.join(self.job.dir, '%s_out_array.mmap' % task.name)
            try:
                self.job.predict_out = np.memmap(
                    filename=f, dtype=_dtype(task), mode='w+', shape=shape)
            except Exception as exc:
                raise OSError(
                    "Cannot create prediction matrix of shape ("
                    "%i, %i), size %i MBs, for %s.\n Details:\n%r" %
                    (shape[0], shape[1], 8 * shape[0] * shape[1] / (1024 ** 2),
                     task.name, exc)) 
Example #18
Source File: hashing.py    From mlens with MIT License 6 votes vote down vote up
def __init__(self, hash_name='md5', coerce_mmap=False):
        """
            Parameters
            ----------
            hash_name: string
                The hash algorithm to be used
            coerce_mmap: boolean
                Make no difference between np.memmap and np.ndarray
                objects.
        """
        self.coerce_mmap = coerce_mmap
        Hasher.__init__(self, hash_name=hash_name)
        # delayed import of numpy, to avoid tight coupling
        import numpy as np
        self.np = np
        if hasattr(np, 'getbuffer'):
            self._getbuffer = np.getbuffer
        else:
            self._getbuffer = memoryview 
Example #19
Source File: elemwise.py    From D-VAE with MIT License 6 votes vote down vote up
def perform(self, node, inp, out):
        input, = inp
        storage, = out
        # drop
        res = input
        if type(res) != numpy.ndarray and type(res) != numpy.memmap:
            raise TypeError(res)

        # transpose
        res = res.transpose(self.shuffle + self.drop)

        # augment
        shape = list(res.shape[:len(self.shuffle)])
        for augm in self.augment:
            shape.insert(augm, 1)
        res = res.reshape(shape)

        # copy (if not inplace)
        if not self.inplace:
            res = numpy.copy(res)

        storage[0] = numpy.asarray(res)  # asarray puts scalars back into array 
Example #20
Source File: sequentialfile.py    From baseband with GNU General Public License v3.0 5 votes vote down vote up
def open(files, mode='rb', file_size=None, opener=None):
    """Read or write several files as if they were one contiguous one.

    Parameters
    ----------
    files : list, tuple, or other iterable of str, filehandle
        Contains the names of the underlying files that should be combined,
        ordered in time.  If not a list or tuple, it should allow indexing with
        positive indices, and raise `IndexError` if these are out of range.
    mode : str, optional
        The mode with which the files should be opened (default: 'rb').
    file_size : int, optional
        For writing, the maximum size of a file, beyond which a new file should
        be opened.  Default: `None`, which means it is unlimited and only a
        single file will be written.
    opener : callable, optional
        Function to open a single file (default: `io.open`).

    Notes
    -----
    The returned reader/writer will have a ``memmap`` method with which part of
    the files can be mapped to memory (like with `~numpy.memmap`), as long as
    those parts do not span files (and the underlying files are regular ones).
    For writing, this requires opening in read-write mode (i.e., 'w+b').

    Methods other than ``read``, ``write``, ``seek``, ``tell``, and ``close``
    are tried on the underlying file.  This implies, e.g., ``readline`` is
    possible, though the line cannot span multiple files.

    The reader assumes the sequence of files is **contiguous in time**, ie.
    with no gaps in the data.
    """
    if 'r' in mode:
        if file_size is not None:
            raise TypeError("cannot pass in 'file_size' for reading.")
        return SequentialFileReader(files, mode, opener=opener)
    elif 'w' in mode:
        return SequentialFileWriter(files, mode, file_size=file_size,
                                    opener=opener)
    else:
        raise ValueError("invalid mode '{0}'".format(mode)) 
Example #21
Source File: data.py    From translate with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __del__(self):
        if self.is_npz:
            if isinstance(self.buffer, np.memmap):
                os.remove(self.buffer.filename)
        else:
            super().__del__() 
Example #22
Source File: extraction_tools.py    From spikeextractors with MIT License 5 votes vote down vote up
def read_binary(file, numchan, dtype, time_axis=0, offset=0):
    '''
    Reads binary .bin or .dat file.

    Parameters
    ----------
    file: str
        File name
    numchan: int
        Number of channels
    dtype: dtype
        dtype of the file
    time_axis: 0 (default) or 1
        If 0 then traces are transposed to ensure (nb_sample, nb_channel) in the file.
        If 1, the traces shape (nb_channel, nb_sample) is kept in the file.
    offset: int
        number of offset bytes

    '''
    numchan = int(numchan)
    with Path(file).open() as f:
        nsamples = (os.fstat(f.fileno()).st_size - offset) // (numchan * np.dtype(dtype).itemsize)
    if time_axis == 0:
        samples = np.memmap(file, np.dtype(dtype), mode='r', offset=offset,
                            shape=(nsamples, numchan))
        samples = np.memmap.transpose(samples)
    else:
        samples = np.memmap(file, np.dtype(dtype), mode='r', offset=offset,
                            shape=(numchan, nsamples))
    return samples 
Example #23
Source File: memmap.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def __getitem__(self, index):
        res = super(memmap, self).__getitem__(index)
        if type(res) is memmap and res._mmap is None:
            return res.view(type=ndarray)
        return res 
Example #24
Source File: test_tools.py    From spikeextractors with MIT License 5 votes vote down vote up
def test_write_dat_file(self):
        nb_sample = self.RX.get_num_frames()
        nb_chan = self.RX.get_num_channels()

        # time_axis=0 chunk_size=None
        self.RX.write_to_binary_dat_format(self.test_dir + 'rec.dat', time_axis=0, dtype='float32', chunk_size=None)
        data = np.memmap(open(self.test_dir + 'rec.dat'), dtype='float32', mode='r', shape=(nb_sample, nb_chan)).T
        assert np.allclose(data, self.RX.get_traces())
        del(data) # this close the file

        # time_axis=1 chunk_size=None
        self.RX.write_to_binary_dat_format(self.test_dir + 'rec.dat', time_axis=1, dtype='float32', chunk_size=None)
        data = np.memmap(open(self.test_dir + 'rec.dat'), dtype='float32', mode='r', shape=(nb_chan, nb_sample))
        assert np.allclose(data, self.RX.get_traces())
        del(data) # this close the file

        # time_axis=0 chunk_size=99
        self.RX.write_to_binary_dat_format(self.test_dir + 'rec.dat', time_axis=0, dtype='float32', chunk_size=99)
        data = np.memmap(open(self.test_dir + 'rec.dat'), dtype='float32', mode='r', shape=(nb_sample, nb_chan)).T
        assert np.allclose(data, self.RX.get_traces())
        del(data) # this close the file

        # time_axis=0 chunk_mb=2
        self.RX.write_to_binary_dat_format(self.test_dir + 'rec.dat', time_axis=0, dtype='float32', chunk_mb=2)
        data = np.memmap(open(self.test_dir + 'rec.dat'), dtype='float32', mode='r', shape=(nb_sample, nb_chan)).T
        assert np.allclose(data, self.RX.get_traces())
        del (data)  # this close the file

        # time_axis=1 chunk_mb=2
        self.RX.write_to_binary_dat_format(self.test_dir + 'rec.dat', time_axis=1, dtype='float32', chunk_mb=2)
        data = np.memmap(open(self.test_dir + 'rec.dat'), dtype='float32', mode='r', shape=(nb_chan, nb_sample))
        assert np.allclose(data, self.RX.get_traces())
        del (data)  # this close the file 
Example #25
Source File: sequentialfile.py    From baseband with GNU General Public License v3.0 5 votes vote down vote up
def memmap(self, dtype=np.uint8, mode=None, offset=None, shape=None,
               order='C'):
        """Map part of the file in memory.

        Note that the map cannnot span multiple underlying files.
        Parameters are as for `~numpy.memmap`.
        """
        if self.closed:
            raise ValueError('memmap of closed file.')

        dtype = np.dtype(dtype)

        if mode is None:
            mode = self.mode.replace('b', '')

        if offset is not None and offset != self.tell():
            # seek will fail for SequentialFileWriter, so we try to avoid it.
            self.seek(offset)
        elif self.fh.tell() == self._file_sizes[self.file_nr]:
            self._open(self.file_nr + 1)

        if shape is None:
            count = self.size - self.tell()
            if count % dtype.itemsize:
                raise ValueError("size of available data is not a "
                                 "multiple of the data-type size.")
            shape = (count // dtype.itemsize,)
        else:
            if not isinstance(shape, tuple):
                shape = (shape,)
            count = dtype.itemsize
            for k in shape:
                count *= k

        if self.fh.tell() + count > self._file_sizes[self.file_nr]:
            raise ValueError('mmap length exceeds individual file size')

        file_offset = self.fh.tell()
        mm = np.memmap(self.fh, dtype, mode, file_offset, shape, order)
        self.fh.seek(file_offset + count)
        return mm 
Example #26
Source File: parrec.py    From me-ica with GNU Lesser General Public License v2.1 5 votes vote down vote up
def raw_data_from_fileobj(self, fileobj):
        """Returns memmap array of raw unscaled image data.

        Array axes correspond to x,y,z,t.
        """
        # memmap the data -- it is guaranteed to be uncompressed and all
        # properties are known
        # read in Fortran order to have spatial axes first
        data = np.memmap(fileobj,
                         dtype=self.get_data_dtype(),
                         mode='c', # copy-on-write
                         shape=self.get_data_shape_in_file(),
                         order='F')
        return data 
Example #27
Source File: _base_functions.py    From mlens with MIT License 5 votes vote down vote up
def slice_array(x, y, idx, r=0):
    """Build training array index and slice data."""
    if idx == 'all':
        idx = None

    if idx:
        # Check if the idx is a tuple and if so, whether it can be made
        # into a simple slice
        if isinstance(idx[0], tuple):
            if len(idx[0]) > 1:
                # Advanced indexing is required. This will trigger a copy
                # of the slice in question to be made
                simple_slice = False
                idx = np.hstack([np.arange(t0 - r, t1 - r) for t0, t1 in idx])
                x = _safe_slice(x, idx)
                y = _safe_slice(y, idx)
            else:
                # The tuple is of the form ((a, b),) and can be made
                # into a simple (a, b) tuple for which basic slicing applies
                # which allows a view to be returned instead of a copy
                simple_slice = True
                idx = idx[0]
        else:
            # Index tuples of the form (a, b) allows simple slicing
            simple_slice = True

        if simple_slice:
            x = x[slice(idx[0] - r, idx[1] - r)]
            y = y[slice(idx[0] - r, idx[1] - r)] if y is not None else y

    # Cast as ndarray to avoid passing memmaps to estimators
    if y is not None and isinstance(y, np.memmap):
        y = y.view(type=np.ndarray)
    if not issparse(x) and isinstance(x, np.memmap):
        x = x.view(type=np.ndarray)

    return x, y 
Example #28
Source File: backend.py    From mlens with MIT License 5 votes vote down vote up
def _initialize(self, job, X, y=None, path=None, **kwargs):
        """Create a job instance for estimation.

        See :func:`~mlens.parallel.backend.BaseProcess.initialize` for
        further details.
        """
        job = Job(job, **kwargs)
        job = _set_path(job, path, self.__threading__)

        # --- Prepare inputs
        for name, arr in zip(('X', 'y'), (X, y)):
            if arr is None:
                continue

            # Dump data in cache
            if self.__threading__:
                # No need to memmap
                f = None
                if isinstance(arr, str):
                    arr = _load(arr)
            else:
                f = dump_array(arr, name, job.dir)

            # Store data for processing
            if name == 'y' and arr is not None:
                job.targets = arr if self.__threading__ else _load_mmap(f)
            elif name == 'X':
                job.predict_in = arr \
                    if self.__threading__ else _load_mmap(f)

        self.job = job
        self.__initialized__ = 1
        gc.collect()
        return self 
Example #29
Source File: memmap.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def __array_wrap__(self, arr, context=None):
        arr = super(memmap, self).__array_wrap__(arr, context)

        # Return a memmap if a memmap was given as the output of the
        # ufunc. Leave the arr class unchanged if self is not a memmap
        # to keep original memmap subclasses behavior
        if self is arr or type(self) is not memmap:
            return arr
        # Return scalar instead of 0d memmap, e.g. for np.sum with
        # axis=None
        if arr.shape == ():
            return arr[()]
        # Return ndarray otherwise
        return arr.view(np.ndarray) 
Example #30
Source File: loader.py    From Automatic-Identification-and-Counting-of-Blood-Cells with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, path):
        self.eof = False # end of file
        self.path = path  # current pos
        if path is None: 
            self.eof = True
            return
        else: 
            self.size = os.path.getsize(path)# save the path
            major, minor, revision, seen = np.memmap(path,
                shape = (), mode = 'r', offset = 0,
                dtype = '({})i4,'.format(4))
            self.transpose = major > 1000 or minor > 1000
            self.offset = 16