Python numpy.memmap() Examples
The following are 30
code examples of numpy.memmap().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
numpy
, or try the search function
.
Example #1
Source File: memmap.py From Computable with MIT License | 6 votes |
def flush(self): """ Write any changes in the array to the file on disk. For further information, see `memmap`. Parameters ---------- None See Also -------- memmap """ if self.base is not None and hasattr(self.base, 'flush'): self.base.flush()
Example #2
Source File: loader.py From Automatic-Identification-and-Counting-of-Blood-Cells with GNU General Public License v3.0 | 6 votes |
def walk(self, size): if self.eof: return None end_point = self.offset + 4 * size assert end_point <= self.size, \ 'Over-read {}'.format(self.path) float32_1D_array = np.memmap( self.path, shape = (), mode = 'r', offset = self.offset, dtype='({})float32,'.format(size) ) self.offset = end_point if end_point == self.size: self.eof = True return float32_1D_array
Example #3
Source File: loader.py From Traffic-Signs-and-Object-Detection with GNU General Public License v3.0 | 6 votes |
def walk(self, size): if self.eof: return None end_point = self.offset + 4 * size assert end_point <= self.size, \ 'Over-read {}'.format(self.path) float32_1D_array = np.memmap( self.path, shape = (), mode = 'r', offset = self.offset, dtype='({})float32,'.format(size) ) self.offset = end_point if end_point == self.size: self.eof = True return float32_1D_array
Example #4
Source File: loader.py From Traffic_sign_detection_YOLO with MIT License | 6 votes |
def walk(self, size): if self.eof: return None end_point = self.offset + 4 * size assert end_point <= self.size, \ 'Over-read {}'.format(self.path) float32_1D_array = np.memmap( self.path, shape = (), mode = 'r', offset = self.offset, dtype='({})float32,'.format(size) ) self.offset = end_point if end_point == self.size: self.eof = True return float32_1D_array
Example #5
Source File: memmap.py From recruit with Apache License 2.0 | 6 votes |
def flush(self): """ Write any changes in the array to the file on disk. For further information, see `memmap`. Parameters ---------- None See Also -------- memmap """ if self.base is not None and hasattr(self.base, 'flush'): self.base.flush()
Example #6
Source File: spikeglx.py From ibllib with MIT License | 6 votes |
def __init__(self, sglx_file): self.file_bin = Path(sglx_file) self.nbytes = self.file_bin.stat().st_size file_meta_data = Path(sglx_file).with_suffix('.meta') if not file_meta_data.exists(): self.file_meta_data = None self.meta = None self.channel_conversion_sample2v = 1 _logger.warning(str(sglx_file) + " : no metadata file found. Very limited support") return # normal case we continue reading and interpreting the metadata file self.file_meta_data = file_meta_data self.meta = read_meta_data(file_meta_data) self.channel_conversion_sample2v = _conversion_sample2v_from_meta(self.meta) # if we are not looking at a compressed file, use a memmap, otherwise instantiate mtscomp if self.is_mtscomp: self.data = mtscomp.Reader() self.data.open(self.file_bin, self.file_bin.with_suffix('.ch')) else: if self.nc * self.ns * 2 != self.nbytes: _logger.warning(str(sglx_file) + " : meta data and filesize do not checkout") self.data = np.memmap(sglx_file, dtype='int16', mode='r', shape=(self.ns, self.nc))
Example #7
Source File: MetaArray.py From tf-pose with Apache License 2.0 | 6 votes |
def _readData1(self, fd, meta, mmap=False, **kwds): ## Read array data from the file descriptor for MetaArray v1 files ## read in axis values for any axis that specifies a length frameSize = 1 for ax in meta['info']: if 'values_len' in ax: ax['values'] = np.fromstring(fd.read(ax['values_len']), dtype=ax['values_type']) frameSize *= ax['values_len'] del ax['values_len'] del ax['values_type'] self._info = meta['info'] if not kwds.get("readAllData", True): return ## the remaining data is the actual array if mmap: subarr = np.memmap(fd, dtype=meta['type'], mode='r', shape=meta['shape']) else: subarr = np.fromstring(fd.read(), dtype=meta['type']) subarr.shape = meta['shape'] self._data = subarr
Example #8
Source File: memmap.py From lambda-packs with MIT License | 6 votes |
def flush(self): """ Write any changes in the array to the file on disk. For further information, see `memmap`. Parameters ---------- None See Also -------- memmap """ if self.base is not None and hasattr(self.base, 'flush'): self.base.flush()
Example #9
Source File: indexed_dataset.py From fairseq with MIT License | 6 votes |
def __init__(self, path): with open(path, 'rb') as stream: magic_test = stream.read(9) assert self._HDR_MAGIC == magic_test, ( 'Index file doesn\'t match expected format. ' 'Make sure that --dataset-impl is configured properly.' ) version = struct.unpack('<Q', stream.read(8)) assert (1,) == version dtype_code, = struct.unpack('<B', stream.read(1)) self._dtype = dtypes[dtype_code] self._dtype_size = self._dtype().itemsize self._len = struct.unpack('<Q', stream.read(8))[0] offset = stream.tell() _warmup_mmap_file(path) self._bin_buffer_mmap = np.memmap(path, mode='r', order='C') self._bin_buffer = memoryview(self._bin_buffer_mmap) self._sizes = np.frombuffer(self._bin_buffer, dtype=np.int32, count=self._len, offset=offset) self._pointers = np.frombuffer(self._bin_buffer, dtype=np.int64, count=self._len, offset=offset + self._sizes.nbytes)
Example #10
Source File: memmap.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def flush(self): """ Write any changes in the array to the file on disk. For further information, see `memmap`. Parameters ---------- None See Also -------- memmap """ if self.base is not None and hasattr(self.base, 'flush'): self.base.flush()
Example #11
Source File: test_extractors.py From spikeextractors with MIT License | 6 votes |
def test_allocate_arrays(self): shape = (30, 1000) dtype = 'int16' arr_in_memory = self.RX.allocate_array(shape=shape, dtype=dtype, memmap=False) arr_memmap = self.RX.allocate_array(shape=shape, dtype=dtype, memmap=True) assert isinstance(arr_in_memory, np.ndarray) assert isinstance(arr_memmap, np.memmap) assert arr_in_memory.shape == shape assert arr_memmap.shape == shape assert arr_in_memory.dtype == dtype assert arr_memmap.dtype == dtype arr_in_memory = self.SX.allocate_array(shape=shape, dtype=dtype, memmap=False) arr_memmap = self.SX.allocate_array(shape=shape, dtype=dtype, memmap=True) assert isinstance(arr_in_memory, np.ndarray) assert isinstance(arr_memmap, np.memmap) assert arr_in_memory.shape == shape assert arr_memmap.shape == shape assert arr_in_memory.dtype == dtype assert arr_memmap.dtype == dtype
Example #12
Source File: readSGLX.py From spikeextractors with MIT License | 6 votes |
def makeMemMapRaw(binFullPath, meta): nChan = int(meta['nSavedChans']) nFileSamp = int(int(meta['fileSizeBytes'])/(2*nChan)) print("nChan: %d, nFileSamp: %d" % (nChan, nFileSamp)) rawData = np.memmap(binFullPath, dtype='int16', mode='r', shape=(nChan, nFileSamp), offset=0, order='F') return(rawData) # Return an array [lines X timepoints] of uint8 values for a # specified set of digital lines. # # - dwReq is the zero-based index into the saved file of the # 16-bit word that contains the digital lines of interest. # - dLineList is a zero-based list of one or more lines/bits # to scan from word dwReq. #
Example #13
Source File: data.py From translate with BSD 3-Clause "New" or "Revised" License | 6 votes |
def load(self, path, num_examples_limit: Optional[int] = None): with PathManager.open(path, "rb") as f: npz = np.load(f) # For big input data, we don't want the cpu to OOM. # Therefore, we are loading the huge buffer array into disc # and reading it from disc instead of memory. if npz["buffer"].nbytes > ARRAY_SIZE_LIMIT_FOR_MEMORY: self.buffer = np.memmap( tempfile.NamedTemporaryFile().name, dtype="float32", mode="w+", shape=npz["buffer"].shape, ) self.buffer[:] = npz["buffer"][:] else: self.buffer = npz["buffer"] self.offsets = npz["offsets"] if num_examples_limit is not None and len(self.offsets) > num_examples_limit: self.offsets = self.offsets[: num_examples_limit + 1] self.buffer = self.buffer[: self.offsets[-1]] self.sizes = self.offsets[1:] - self.offsets[:-1]
Example #14
Source File: hashing.py From estimators with MIT License | 6 votes |
def __init__(self, hash_name='md5', coerce_mmap=False): """ Parameters ---------- hash_name: string The hash algorithm to be used coerce_mmap: boolean Make no difference between np.memmap and np.ndarray objects. """ self.coerce_mmap = coerce_mmap Hasher.__init__(self, hash_name=hash_name) # delayed import of numpy, to avoid tight coupling import numpy as np self.np = np if hasattr(np, 'getbuffer'): self._getbuffer = np.getbuffer else: self._getbuffer = memoryview
Example #15
Source File: hashing.py From estimators with MIT License | 6 votes |
def hash(obj, hash_name='md5', coerce_mmap=False): """ Quick calculation of a hash to identify uniquely Python objects containing numpy arrays. Parameters ----------- hash_name: 'md5' or 'sha1' Hashing algorithm used. sha1 is supposedly safer, but md5 is faster. coerce_mmap: boolean Make no difference between np.memmap and np.ndarray """ if 'numpy' in sys.modules: hasher = NumpyHasher(hash_name=hash_name, coerce_mmap=coerce_mmap) else: hasher = Hasher(hash_name=hash_name) return hasher.hash(obj)
Example #16
Source File: memmap.py From vnpy_crypto with MIT License | 6 votes |
def flush(self): """ Write any changes in the array to the file on disk. For further information, see `memmap`. Parameters ---------- None See Also -------- memmap """ if self.base is not None and hasattr(self.base, 'flush'): self.base.flush()
Example #17
Source File: backend.py From mlens with MIT License | 6 votes |
def _gen_prediction_array(self, task, job, threading): """Generate prediction array either in-memory or persist to disk.""" shape = task.shape(job) if threading: self.job.predict_out = np.zeros(shape, dtype=_dtype(task)) else: f = os.path.join(self.job.dir, '%s_out_array.mmap' % task.name) try: self.job.predict_out = np.memmap( filename=f, dtype=_dtype(task), mode='w+', shape=shape) except Exception as exc: raise OSError( "Cannot create prediction matrix of shape (" "%i, %i), size %i MBs, for %s.\n Details:\n%r" % (shape[0], shape[1], 8 * shape[0] * shape[1] / (1024 ** 2), task.name, exc))
Example #18
Source File: hashing.py From mlens with MIT License | 6 votes |
def __init__(self, hash_name='md5', coerce_mmap=False): """ Parameters ---------- hash_name: string The hash algorithm to be used coerce_mmap: boolean Make no difference between np.memmap and np.ndarray objects. """ self.coerce_mmap = coerce_mmap Hasher.__init__(self, hash_name=hash_name) # delayed import of numpy, to avoid tight coupling import numpy as np self.np = np if hasattr(np, 'getbuffer'): self._getbuffer = np.getbuffer else: self._getbuffer = memoryview
Example #19
Source File: elemwise.py From D-VAE with MIT License | 6 votes |
def perform(self, node, inp, out): input, = inp storage, = out # drop res = input if type(res) != numpy.ndarray and type(res) != numpy.memmap: raise TypeError(res) # transpose res = res.transpose(self.shuffle + self.drop) # augment shape = list(res.shape[:len(self.shuffle)]) for augm in self.augment: shape.insert(augm, 1) res = res.reshape(shape) # copy (if not inplace) if not self.inplace: res = numpy.copy(res) storage[0] = numpy.asarray(res) # asarray puts scalars back into array
Example #20
Source File: sequentialfile.py From baseband with GNU General Public License v3.0 | 5 votes |
def open(files, mode='rb', file_size=None, opener=None): """Read or write several files as if they were one contiguous one. Parameters ---------- files : list, tuple, or other iterable of str, filehandle Contains the names of the underlying files that should be combined, ordered in time. If not a list or tuple, it should allow indexing with positive indices, and raise `IndexError` if these are out of range. mode : str, optional The mode with which the files should be opened (default: 'rb'). file_size : int, optional For writing, the maximum size of a file, beyond which a new file should be opened. Default: `None`, which means it is unlimited and only a single file will be written. opener : callable, optional Function to open a single file (default: `io.open`). Notes ----- The returned reader/writer will have a ``memmap`` method with which part of the files can be mapped to memory (like with `~numpy.memmap`), as long as those parts do not span files (and the underlying files are regular ones). For writing, this requires opening in read-write mode (i.e., 'w+b'). Methods other than ``read``, ``write``, ``seek``, ``tell``, and ``close`` are tried on the underlying file. This implies, e.g., ``readline`` is possible, though the line cannot span multiple files. The reader assumes the sequence of files is **contiguous in time**, ie. with no gaps in the data. """ if 'r' in mode: if file_size is not None: raise TypeError("cannot pass in 'file_size' for reading.") return SequentialFileReader(files, mode, opener=opener) elif 'w' in mode: return SequentialFileWriter(files, mode, file_size=file_size, opener=opener) else: raise ValueError("invalid mode '{0}'".format(mode))
Example #21
Source File: data.py From translate with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __del__(self): if self.is_npz: if isinstance(self.buffer, np.memmap): os.remove(self.buffer.filename) else: super().__del__()
Example #22
Source File: extraction_tools.py From spikeextractors with MIT License | 5 votes |
def read_binary(file, numchan, dtype, time_axis=0, offset=0): ''' Reads binary .bin or .dat file. Parameters ---------- file: str File name numchan: int Number of channels dtype: dtype dtype of the file time_axis: 0 (default) or 1 If 0 then traces are transposed to ensure (nb_sample, nb_channel) in the file. If 1, the traces shape (nb_channel, nb_sample) is kept in the file. offset: int number of offset bytes ''' numchan = int(numchan) with Path(file).open() as f: nsamples = (os.fstat(f.fileno()).st_size - offset) // (numchan * np.dtype(dtype).itemsize) if time_axis == 0: samples = np.memmap(file, np.dtype(dtype), mode='r', offset=offset, shape=(nsamples, numchan)) samples = np.memmap.transpose(samples) else: samples = np.memmap(file, np.dtype(dtype), mode='r', offset=offset, shape=(numchan, nsamples)) return samples
Example #23
Source File: memmap.py From vnpy_crypto with MIT License | 5 votes |
def __getitem__(self, index): res = super(memmap, self).__getitem__(index) if type(res) is memmap and res._mmap is None: return res.view(type=ndarray) return res
Example #24
Source File: test_tools.py From spikeextractors with MIT License | 5 votes |
def test_write_dat_file(self): nb_sample = self.RX.get_num_frames() nb_chan = self.RX.get_num_channels() # time_axis=0 chunk_size=None self.RX.write_to_binary_dat_format(self.test_dir + 'rec.dat', time_axis=0, dtype='float32', chunk_size=None) data = np.memmap(open(self.test_dir + 'rec.dat'), dtype='float32', mode='r', shape=(nb_sample, nb_chan)).T assert np.allclose(data, self.RX.get_traces()) del(data) # this close the file # time_axis=1 chunk_size=None self.RX.write_to_binary_dat_format(self.test_dir + 'rec.dat', time_axis=1, dtype='float32', chunk_size=None) data = np.memmap(open(self.test_dir + 'rec.dat'), dtype='float32', mode='r', shape=(nb_chan, nb_sample)) assert np.allclose(data, self.RX.get_traces()) del(data) # this close the file # time_axis=0 chunk_size=99 self.RX.write_to_binary_dat_format(self.test_dir + 'rec.dat', time_axis=0, dtype='float32', chunk_size=99) data = np.memmap(open(self.test_dir + 'rec.dat'), dtype='float32', mode='r', shape=(nb_sample, nb_chan)).T assert np.allclose(data, self.RX.get_traces()) del(data) # this close the file # time_axis=0 chunk_mb=2 self.RX.write_to_binary_dat_format(self.test_dir + 'rec.dat', time_axis=0, dtype='float32', chunk_mb=2) data = np.memmap(open(self.test_dir + 'rec.dat'), dtype='float32', mode='r', shape=(nb_sample, nb_chan)).T assert np.allclose(data, self.RX.get_traces()) del (data) # this close the file # time_axis=1 chunk_mb=2 self.RX.write_to_binary_dat_format(self.test_dir + 'rec.dat', time_axis=1, dtype='float32', chunk_mb=2) data = np.memmap(open(self.test_dir + 'rec.dat'), dtype='float32', mode='r', shape=(nb_chan, nb_sample)) assert np.allclose(data, self.RX.get_traces()) del (data) # this close the file
Example #25
Source File: sequentialfile.py From baseband with GNU General Public License v3.0 | 5 votes |
def memmap(self, dtype=np.uint8, mode=None, offset=None, shape=None, order='C'): """Map part of the file in memory. Note that the map cannnot span multiple underlying files. Parameters are as for `~numpy.memmap`. """ if self.closed: raise ValueError('memmap of closed file.') dtype = np.dtype(dtype) if mode is None: mode = self.mode.replace('b', '') if offset is not None and offset != self.tell(): # seek will fail for SequentialFileWriter, so we try to avoid it. self.seek(offset) elif self.fh.tell() == self._file_sizes[self.file_nr]: self._open(self.file_nr + 1) if shape is None: count = self.size - self.tell() if count % dtype.itemsize: raise ValueError("size of available data is not a " "multiple of the data-type size.") shape = (count // dtype.itemsize,) else: if not isinstance(shape, tuple): shape = (shape,) count = dtype.itemsize for k in shape: count *= k if self.fh.tell() + count > self._file_sizes[self.file_nr]: raise ValueError('mmap length exceeds individual file size') file_offset = self.fh.tell() mm = np.memmap(self.fh, dtype, mode, file_offset, shape, order) self.fh.seek(file_offset + count) return mm
Example #26
Source File: parrec.py From me-ica with GNU Lesser General Public License v2.1 | 5 votes |
def raw_data_from_fileobj(self, fileobj): """Returns memmap array of raw unscaled image data. Array axes correspond to x,y,z,t. """ # memmap the data -- it is guaranteed to be uncompressed and all # properties are known # read in Fortran order to have spatial axes first data = np.memmap(fileobj, dtype=self.get_data_dtype(), mode='c', # copy-on-write shape=self.get_data_shape_in_file(), order='F') return data
Example #27
Source File: _base_functions.py From mlens with MIT License | 5 votes |
def slice_array(x, y, idx, r=0): """Build training array index and slice data.""" if idx == 'all': idx = None if idx: # Check if the idx is a tuple and if so, whether it can be made # into a simple slice if isinstance(idx[0], tuple): if len(idx[0]) > 1: # Advanced indexing is required. This will trigger a copy # of the slice in question to be made simple_slice = False idx = np.hstack([np.arange(t0 - r, t1 - r) for t0, t1 in idx]) x = _safe_slice(x, idx) y = _safe_slice(y, idx) else: # The tuple is of the form ((a, b),) and can be made # into a simple (a, b) tuple for which basic slicing applies # which allows a view to be returned instead of a copy simple_slice = True idx = idx[0] else: # Index tuples of the form (a, b) allows simple slicing simple_slice = True if simple_slice: x = x[slice(idx[0] - r, idx[1] - r)] y = y[slice(idx[0] - r, idx[1] - r)] if y is not None else y # Cast as ndarray to avoid passing memmaps to estimators if y is not None and isinstance(y, np.memmap): y = y.view(type=np.ndarray) if not issparse(x) and isinstance(x, np.memmap): x = x.view(type=np.ndarray) return x, y
Example #28
Source File: backend.py From mlens with MIT License | 5 votes |
def _initialize(self, job, X, y=None, path=None, **kwargs): """Create a job instance for estimation. See :func:`~mlens.parallel.backend.BaseProcess.initialize` for further details. """ job = Job(job, **kwargs) job = _set_path(job, path, self.__threading__) # --- Prepare inputs for name, arr in zip(('X', 'y'), (X, y)): if arr is None: continue # Dump data in cache if self.__threading__: # No need to memmap f = None if isinstance(arr, str): arr = _load(arr) else: f = dump_array(arr, name, job.dir) # Store data for processing if name == 'y' and arr is not None: job.targets = arr if self.__threading__ else _load_mmap(f) elif name == 'X': job.predict_in = arr \ if self.__threading__ else _load_mmap(f) self.job = job self.__initialized__ = 1 gc.collect() return self
Example #29
Source File: memmap.py From vnpy_crypto with MIT License | 5 votes |
def __array_wrap__(self, arr, context=None): arr = super(memmap, self).__array_wrap__(arr, context) # Return a memmap if a memmap was given as the output of the # ufunc. Leave the arr class unchanged if self is not a memmap # to keep original memmap subclasses behavior if self is arr or type(self) is not memmap: return arr # Return scalar instead of 0d memmap, e.g. for np.sum with # axis=None if arr.shape == (): return arr[()] # Return ndarray otherwise return arr.view(np.ndarray)
Example #30
Source File: loader.py From Automatic-Identification-and-Counting-of-Blood-Cells with GNU General Public License v3.0 | 5 votes |
def __init__(self, path): self.eof = False # end of file self.path = path # current pos if path is None: self.eof = True return else: self.size = os.path.getsize(path)# save the path major, minor, revision, seen = np.memmap(path, shape = (), mode = 'r', offset = 0, dtype = '({})i4,'.format(4)) self.transpose = major > 1000 or minor > 1000 self.offset = 16