Python mmap.mmap() Examples

The following are code examples for showing how to use mmap.mmap(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: pyblish-win   Author: pyblish   File: heap.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def malloc(self, size):
        # return a block of right size (possibly rounded up)
        assert 0 <= size < sys.maxint
        if os.getpid() != self._lastpid:
            self.__init__()                     # reinitialize after fork
        self._lock.acquire()
        self._free_pending_blocks()
        try:
            size = self._roundup(max(size,1), self._alignment)
            (arena, start, stop) = self._malloc(size)
            new_stop = start + size
            if new_stop < stop:
                self._free((arena, new_stop, stop))
            block = (arena, start, new_stop)
            self._allocated_blocks.add(block)
            return block
        finally:
            self._lock.release()

#
# Class representing a chunk of an mmap -- can be inherited
# 
Example 2
Project: PyBoof   Author: lessthanoptimal   File: __init__.py    Apache License 2.0 6 votes vote down vote up
def init_memmap(size_mb=2):
    """
    Call to enable use of memory mapped files for quick communication between Python and Java.  This
    faster communication method requires specialized code so is only used when large amounts of memory
    is being transferred.

    :param size_mb: Size of the memory mapped file in megabytes
    :type size_mb: int
    """
    global mmap_size, mmap_file
    import tempfile
    mmap_path = os.path.join(tempfile.gettempdir(),"pyboof_mmap")
    mmap_size = size_mb * 1024 * 1024
    gateway.jvm.pyboof.PyBoofEntryPoint.initializeMmap(mmap_path, size_mb)
    # Open file in read,write,binary mode
    mmap_fid = open(mmap_path, "r+b")
    mmap_file = mmap.mmap(mmap_fid.fileno(), length=0, flags=mmap.MAP_SHARED,
                          prot=mmap.PROT_READ | mmap.PROT_WRITE) 
Example 3
Project: LaserTOF   Author: kyleuckert   File: netcdf.py    MIT License 6 votes vote down vote up
def close(self):
        """Closes the NetCDF file."""
        if not self.fp.closed:
            try:
                self.flush()
            finally:
                self.variables = OrderedDict()
                if self._mm_buf is not None:
                    ref = weakref.ref(self._mm_buf)
                    self._mm_buf = None
                    if ref() is None:
                        # self._mm_buf is gc'd, and we can close the mmap
                        self._mm.close()
                    else:
                        # we cannot close self._mm, since self._mm_buf is
                        # alive and there may still be arrays referring to it
                        warnings.warn((
                            "Cannot close a netcdf_file opened with mmap=True, when "
                            "netcdf_variables or arrays referring to its data still exist. "
                            "All data arrays obtained from such files refer directly to "
                            "data on disk, and must be copied before the file can be cleanly "
                            "closed. (See netcdf_file docstring for more information on mmap.)"
                        ), category=RuntimeWarning)
                self._mm = None
                self.fp.close() 
Example 4
Project: NiujiaoDebugger   Author: MrSrc   File: heap.py    GNU General Public License v3.0 6 votes vote down vote up
def malloc(self, size):
        # return a block of right size (possibly rounded up)
        if size < 0:
            raise ValueError("Size {0:n} out of range".format(size))
        if sys.maxsize <= size:
            raise OverflowError("Size {0:n} too large".format(size))
        if os.getpid() != self._lastpid:
            self.__init__()                     # reinitialize after fork
        with self._lock:
            self._free_pending_blocks()
            size = self._roundup(max(size,1), self._alignment)
            (arena, start, stop) = self._malloc(size)
            new_stop = start + size
            if new_stop < stop:
                self._free((arena, new_stop, stop))
            block = (arena, start, new_stop)
            self._allocated_blocks.add(block)
            return block

#
# Class representing a chunk of an mmap -- can be inherited by child process
# 
Example 5
Project: DDEA-DEV   Author: TinyOS-Camp   File: toolset.py    GNU General Public License v2.0 6 votes vote down vote up
def loadObjectBinMmap(filename):
    with open(filename, os.O_RDONLY) as f:
        try:
            # goto the end of file
            f.seek(0, 2)
            size = f.tell()
            f.seek(0, 0)
            m = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
            try:
                obj = pickle.loads(m)
            finally:
                m.close()
        finally:
            f.close()
    return obj

#multi-processing version 
Example 6
Project: DDEA-DEV   Author: TinyOS-Camp   File: toolset.py    GNU General Public License v2.0 6 votes vote down vote up
def loadObjectBinMmap(filename):
    with open(filename, os.O_RDONLY) as f:
        try:
            # goto the end of file
            f.seek(0, 2)
            size = f.tell()
            f.seek(0, 0)
            m = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
            try:
                obj = pickle.loads(m)
            finally:
                m.close()
        finally:
            f.close()
    return obj

#multi-processing version 
Example 7
Project: QuantStudio   Author: Scorpi000   File: RiskDB.py    GNU General Public License v3.0 6 votes vote down vote up
def start(self, dts, **kwargs):
        if self.ErgodicMode._isStarted: return 0
        self.ErgodicMode._DateTimes = np.array((self.getDateTime() if not self.ErgodicMode.ErgodicDTs else self.ErgodicMode.ErgodicDTs), dtype="O")
        if self.ErgodicMode._DateTimes.shape[0]==0: raise __QS_Error__("风险表: '%s' 的默认时间序列为空, 请设置参数 '遍历模式-遍历时点' !" % self._Name)
        self.ErgodicMode._IDs = (self.getID() if not self.ErgodicMode.ErgodicIDs else list(self.ErgodicMode.ErgodicIDs))
        if not self.ErgodicMode._IDs: raise __QS_Error__("风险表: '%s' 的默认 ID 序列为空, 请设置参数 '遍历模式-遍历ID' !" % self._Name)
        self.ErgodicMode._CurInd = -1
        self.ErgodicMode._DTNum = self.ErgodicMode._DateTimes.shape[0]# 时点数
        self.ErgodicMode._CacheDTs = []
        self.ErgodicMode._CacheData = {}
        self.ErgodicMode._Queue2SubProcess = Queue()
        self.ErgodicMode._Queue2MainProcess = Queue()
        if self.ErgodicMode.CacheSize>0:
            if os.name=="nt":
                self.ErgodicMode._TagName = str(uuid.uuid1())# 共享内存的 tag
                self._MMAPCacheData = None
            else:
                self.ErgodicMode._TagName = None# 共享内存的 tag
                self._MMAPCacheData = mmap.mmap(-1, int(self.ErgodicMode.CacheSize*2**20))# 当前共享内存缓冲区
            self.ErgodicMode._CacheDataProcess = Process(target=_prepareRTMMAPCacheData, args=(self, self._MMAPCacheData), daemon=True)
            self.ErgodicMode._CacheDataProcess.start()
            if os.name=="nt": self._MMAPCacheData = mmap.mmap(-1, int(self.ErgodicMode.CacheSize*2**20), tagname=self.ErgodicMode._TagName)# 当前共享内存缓冲区
        self.ErgodicMode._isStarted = True
        return 0 
Example 8
Project: perceptio   Author: tryexceptpass   File: visualize.py    MIT License 6 votes vote down vote up
def skrepresent(path):
    """Show a square image that represents the file specified starting from an np.zeros array"""

    data = None
    with open(path, 'rb') as infile:
        data = bytearray(mmap.mmap(infile.fileno(), 0, access=mmap.ACCESS_READ))
    side = math.ceil(math.sqrt(len(data)))

    #img = Image.new('RGB', (side, side), "black")
    pixels = np.zeros(shape=(side, side, 3), dtype=np.uint8)

    for i in range(side):
        for j in range(side):
            index = i*side+j
            if index < len(data):
                pixels[i, j, 0] = data[index]
                pixels[i, j, 1] = data[index]
                pixels[i, j, 2] = 0
    
    io.imshow(pixels)
    io.show() 
Example 9
Project: tf-deep-facial-recognition-lite   Author: velociraptor111   File: ArrayUtils.py    MIT License 6 votes vote down vote up
def __new__(subtype, shape, dtype=float, buffer=None, offset=0, strides=None, order=None, info=None):
		
		# Determine the size in bytes required to hold the array
		numBytes = _requiredSize(shape, dtype)
		
		# Create the temporary file, resize it, and map it into memory
		tempFile = tempfile.TemporaryFile()
		tempFile.truncate(numBytes)
		buf = mmap.mmap(tempFile.fileno(), numBytes, access=mmap.ACCESS_WRITE)
		
		# Create the ndarray with the memory map as the underlying buffer
		obj = super(TempfileBackedArray, subtype).__new__(subtype, shape, dtype, buf, 0, None, order)
		
		# Attach the file reference to the ndarray object
		obj._file = tempFile
		return obj 
Example 10
Project: macro_pack   Author: sevagas   File: mp_module.py    Apache License 2.0 6 votes vote down vote up
def startFunction(self):
        """ Return start function, attempt to find it in vba files if _startFunction is not set """
        result = None
        if self._startFunction is not None:
            result =  self._startFunction
        else:
             
            vbaFiles = self.getVBAFiles()
            for vbaFile in vbaFiles:
                if  os.stat(vbaFile).st_size != 0:  
                    with open(vbaFile, 'rb', 0) as file, mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) as s:
                        for potentialStartFunction in self.potentialStartFunctions:
                            if s.find(potentialStartFunction.encode()) != -1:
                                self._startFunction = potentialStartFunction
                                if self._startFunction not in self.reservedFunctions:
                                    self.reservedFunctions.append(self._startFunction)
                                result = potentialStartFunction
                                break                
        return result 
Example 11
Project: macro_pack   Author: sevagas   File: mp_module.py    Apache License 2.0 6 votes vote down vote up
def getMainVBAFile(self):
        """ return main vba file (the one containing macro entry point) """
        result = ""
        vbaFiles = self.getVBAFiles()
        if len(vbaFiles)==1:
            result = vbaFiles[0]
        else:
            if self.startFunction is not None:
                for vbaFile in vbaFiles:
                    if  os.stat(vbaFile).st_size != 0:  
                        with open(vbaFile, 'rb', 0) as file, mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) as s:
                            if s.find(self.startFunction.encode()) != -1:
                                result  = vbaFile
                                break
                            
        return result 
Example 12
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: netcdf.py    GNU General Public License v3.0 6 votes vote down vote up
def close(self):
        """Closes the NetCDF file."""
        if not self.fp.closed:
            try:
                self.flush()
            finally:
                self.variables = {}
                if self._mm_buf is not None:
                    ref = weakref.ref(self._mm_buf)
                    self._mm_buf = None
                    if ref() is None:
                        # self._mm_buf is gc'd, and we can close the mmap
                        self._mm.close()
                    else:
                        # we cannot close self._mm, since self._mm_buf is
                        # alive and there may still be arrays referring to it
                        warnings.warn((
                            "Cannot close a netcdf_file opened with mmap=True, when "
                            "netcdf_variables or arrays referring to its data still exist. "
                            "All data arrays obtained from such files refer directly to "
                            "data on disk, and must be copied before the file can be cleanly "
                            "closed. (See netcdf_file docstring for more information on mmap.)"
                        ), category=RuntimeWarning)
                self._mm = None
                self.fp.close() 
Example 13
Project: https-github.com-stamparm-maltrail   Author: hxp2k6   File: sensor.py    MIT License 6 votes vote down vote up
def _init_multiprocessing():
    """
    Inits worker processes used in multiprocessing mode
    """

    global _buffer
    global _n

    if _multiprocessing:
        print("[i] creating %d more processes (%d CPU cores detected)" % (_multiprocessing.cpu_count() - 1, _multiprocessing.cpu_count()))
        _buffer = mmap.mmap(-1, BUFFER_LENGTH)  # http://www.alexonlinux.com/direct-io-in-python
        _n = _multiprocessing.Value('L', lock=False)

        for i in xrange(_multiprocessing.cpu_count() - 1):
            process = _multiprocessing.Process(target=worker, name=str(i), args=(_buffer, _n, i, _multiprocessing.cpu_count() - 1, _process_packet))
            process.daemon = True
            process.start() 
Example 14
Project: filesystem_spec   Author: intake   File: caching.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _makefile(self):
        import tempfile
        import mmap

        if self.size == 0:
            return bytearray()

        # posix version
        if self.location is None or not os.path.exists(self.location):
            if self.location is None:
                fd = tempfile.TemporaryFile()
                self.blocks = set()
            else:
                fd = io.open(self.location, "wb+")
            fd.seek(self.size - 1)
            fd.write(b"1")
            fd.flush()
        else:
            fd = io.open(self.location, "rb+")

        return mmap.mmap(fd.fileno(), self.size) 
Example 15
Project: tf-pose   Author: SrikanthVelpuri   File: RemoteGraphicsView.py    Apache License 2.0 6 votes vote down vote up
def remoteSceneChanged(self, data):
        w, h, size, newfile = data
        #self._sizeHint = (whint, hhint)
        if self.shm is None or self.shm.size != size:
            if self.shm is not None:
                self.shm.close()
            if sys.platform.startswith('win'):
                self.shmtag = newfile   ## on windows, we create a new tag for every resize
                self.shm = mmap.mmap(-1, size, self.shmtag) ## can't use tmpfile on windows because the file can only be opened once.
            elif sys.platform == 'darwin':
                self.shmFile.close()
                self.shmFile = open(self._view.shmFileName(), 'r')
                self.shm = mmap.mmap(self.shmFile.fileno(), size, mmap.MAP_SHARED, mmap.PROT_READ)
            else:
                self.shm = mmap.mmap(self.shmFile.fileno(), size, mmap.MAP_SHARED, mmap.PROT_READ)
        self.shm.seek(0)
        data = self.shm.read(w*h*4)
        self._img = QtGui.QImage(data, w, h, QtGui.QImage.Format_ARGB32)
        self._img.data = data  # data must be kept alive or PySide 1.2.1 (and probably earlier) will crash.
        self.update() 
Example 16
Project: tf-pose   Author: SrikanthVelpuri   File: RemoteGraphicsView.py    Apache License 2.0 6 votes vote down vote up
def __init__(self, *args, **kwds):
        ## Create shared memory for rendered image
        #pg.dbg(namespace={'r': self})
        if sys.platform.startswith('win'):
            self.shmtag = "pyqtgraph_shmem_" + ''.join([chr((random.getrandbits(20)%25) + 97) for i in range(20)])
            self.shm = mmap.mmap(-1, mmap.PAGESIZE, self.shmtag) # use anonymous mmap on windows
        else:
            self.shmFile = tempfile.NamedTemporaryFile(prefix='pyqtgraph_shmem_')
            self.shmFile.write(b'\x00' * (mmap.PAGESIZE+1))
            fd = self.shmFile.fileno()
            self.shm = mmap.mmap(fd, mmap.PAGESIZE, mmap.MAP_SHARED, mmap.PROT_WRITE)
        atexit.register(self.close)
        
        GraphicsView.__init__(self, *args, **kwds)
        self.scene().changed.connect(self.update)
        self.img = None
        self.renderTimer = QtCore.QTimer()
        self.renderTimer.timeout.connect(self.renderView)
        self.renderTimer.start(16) 
Example 17
Project: pyblish-win   Author: pyblish   File: test_os.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def _kill_with_event(self, event, name):
        tagname = "test_os_%s" % uuid.uuid1()
        m = mmap.mmap(-1, 1, tagname)
        m[0] = '0'
        # Run a script which has console control handling enabled.
        proc = subprocess.Popen([sys.executable,
                   os.path.join(os.path.dirname(__file__),
                                "win_console_handler.py"), tagname],
                   creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
        # Let the interpreter startup before we send signals. See #3137.
        count, max = 0, 20
        while count < max and proc.poll() is None:
            if m[0] == '1':
                break
            time.sleep(0.5)
            count += 1
        else:
            self.fail("Subprocess didn't finish initialization")
        os.kill(proc.pid, event)
        # proc.send_signal(event) could also be done here.
        # Allow time for the signal to be passed and the process to exit.
        time.sleep(0.5)
        if not proc.poll():
            # Forcefully kill the process if we weren't able to signal it.
            os.kill(proc.pid, signal.SIGINT)
            self.fail("subprocess did not stop on {}".format(name)) 
Example 18
Project: pyblish-win   Author: pyblish   File: heap.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def __setstate__(self, state):
            self.size, self.name = self._state = state
            self.buffer = mmap.mmap(-1, self.size, tagname=self.name)
            assert win32.GetLastError() == win32.ERROR_ALREADY_EXISTS 
Example 19
Project: pyblish-win   Author: pyblish   File: heap.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, size):
            self.buffer = mmap.mmap(-1, size)
            self.size = size
            self.name = None

#
# Class allowing allocation of chunks of memory from arenas
# 
Example 20
Project: DOTA_models   Author: ringringyi   File: vecs.py    Apache License 2.0 5 votes vote down vote up
def __init__(self, vocab_filename, rows_filename, cols_filename=None):
    """Initializes the vectors from a text vocabulary and binary data."""
    with open(vocab_filename, 'r') as lines:
      self.vocab = [line.split()[0] for line in lines]
      self.word_to_idx = {word: idx for idx, word in enumerate(self.vocab)}

    n = len(self.vocab)

    with open(rows_filename, 'r') as rows_fh:
      rows_fh.seek(0, os.SEEK_END)
      size = rows_fh.tell()

      # Make sure that the file size seems reasonable.
      if size % (4 * n) != 0:
        raise IOError(
            'unexpected file size for binary vector file %s' % rows_filename)

      # Memory map the rows.
      dim = size / (4 * n)
      rows_mm = mmap.mmap(rows_fh.fileno(), 0, prot=mmap.PROT_READ)
      rows = np.matrix(
          np.frombuffer(rows_mm, dtype=np.float32).reshape(n, dim))

      # If column vectors were specified, then open them and add them to the row
      # vectors.
      if cols_filename:
        with open(cols_filename, 'r') as cols_fh:
          cols_mm = mmap.mmap(cols_fh.fileno(), 0, prot=mmap.PROT_READ)
          cols_fh.seek(0, os.SEEK_END)
          if cols_fh.tell() != size:
            raise IOError('row and column vector files have different sizes')

          cols = np.matrix(
              np.frombuffer(cols_mm, dtype=np.float32).reshape(n, dim))

          rows += cols
          cols_mm.close()

      # Normalize so that dot products are just cosine similarity.
      self.vecs = rows / np.linalg.norm(rows, axis=1).reshape(n, 1)
      rows_mm.close() 
Example 21
Project: Trusted-Platform-Module-nova   Author: BU-NU-CLOUD-SP16   File: driver.py    Apache License 2.0 5 votes vote down vote up
def _supports_direct_io(dirpath):

        if not hasattr(os, 'O_DIRECT'):
            LOG.debug("This python runtime does not support direct I/O")
            return False

        testfile = os.path.join(dirpath, ".directio.test")

        hasDirectIO = True
        try:
            f = os.open(testfile, os.O_CREAT | os.O_WRONLY | os.O_DIRECT)
            # Check is the write allowed with 512 byte alignment
            align_size = 512
            m = mmap.mmap(-1, align_size)
            m.write(r"x" * align_size)
            os.write(f, m)
            os.close(f)
            LOG.debug("Path '%(path)s' supports direct I/O",
                      {'path': dirpath})
        except OSError as e:
            if e.errno == errno.EINVAL:
                LOG.debug("Path '%(path)s' does not support direct I/O: "
                          "'%(ex)s'", {'path': dirpath, 'ex': e})
                hasDirectIO = False
            else:
                with excutils.save_and_reraise_exception():
                    LOG.error(_LE("Error on '%(path)s' while checking "
                                  "direct I/O: '%(ex)s'"),
                              {'path': dirpath, 'ex': e})
        except Exception as e:
            with excutils.save_and_reraise_exception():
                LOG.error(_LE("Error on '%(path)s' while checking direct I/O: "
                              "'%(ex)s'"), {'path': dirpath, 'ex': e})
        finally:
            try:
                os.unlink(testfile)
            except Exception:
                pass

        return hasDirectIO 
Example 22
Project: neos-classic-electrum-server   Author: neoscoin   File: deserialize.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def map_file(self, file, start):    # Initialize with bytes from file
        self.input = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
        self.read_cursor = start 
Example 23
Project: pyCEST   Author: pganssle   File: nylib.py    MIT License 5 votes vote down vote up
def searchFile(file, string):
    with open(file) as f:
        map = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
        for line in iter(map.readline, ""):
            if string in line:
                return line


## Split an arrayinto the desired number of parts 
Example 24
Project: shepherd   Author: geissdoerfer   File: shepherd_io.py    MIT License 5 votes vote down vote up
def __enter__(self):
        self.devmem_fd = os.open("/dev/mem", os.O_RDWR | os.O_SYNC)

        self.mapped_mem = mmap.mmap(
            self.devmem_fd,
            self.size,
            mmap.MAP_SHARED,
            mmap.PROT_WRITE,
            offset=self.address,
        )

        return self 
Example 25
Project: NCBoost   Author: RausellLab   File: get_CDTS.py    Apache License 2.0 5 votes vote down vote up
def get_num_lines(file_path):
    fp = open(file_path, "r+")
    buf = mmap.mmap(fp.fileno(), 0)
    lines = 0
    while buf.readline():
        lines += 1
    return lines 
Example 26
Project: NCBoost   Author: RausellLab   File: get_GnomAD_MAFs.py    Apache License 2.0 5 votes vote down vote up
def get_num_lines(file_path):
    fp = open(file_path, "r+")
    buf = mmap.mmap(fp.fileno(), 0)
    lines = 0
    while buf.readline():
        lines += 1
    return lines 
Example 27
Project: NCBoost   Author: RausellLab   File: get_1000GP_possel_scores.py    Apache License 2.0 5 votes vote down vote up
def get_num_lines(file_path):
    fp = open(file_path, "r+")
    buf = mmap.mmap(fp.fileno(), 0)
    lines = 0
    while buf.readline():
        lines += 1
    return lines 
Example 28
Project: NCBoost   Author: RausellLab   File: get_cadd_features.py    Apache License 2.0 5 votes vote down vote up
def get_num_lines(file_path):
    fp = open(file_path, "r+")
    buf = mmap.mmap(fp.fileno(), 0)
    lines = 0
    while buf.readline():
        lines += 1
    return lines 
Example 29
Project: NCBoost   Author: RausellLab   File: get_1000GP_daf_het.py    Apache License 2.0 5 votes vote down vote up
def get_num_lines(file_path):
    fp = open(file_path, "r+")
    buf = mmap.mmap(fp.fileno(), 0)
    lines = 0
    while buf.readline():
        lines += 1
    return lines 
Example 30
Project: RTX   Author: RTXteam   File: trainer.py    MIT License 5 votes vote down vote up
def __init__(self, knowledgebase):
        if _sanity_check:
            ExprGenerator.setup()
            State.ExtraInfoGen = ExprGenerator
        Perceptron.model = Model()
        Perceptron.indepKB = IndepKnowledgeBase()
        Perceptron.KB = knowledgebase
        knowledgebase.postedit_indepkb(Perceptron.indepKB)
        Perceptron.c = 0
        Perceptron.iter = FLAGS.iter
        Perceptron.beamsize = FLAGS.beam
        Perceptron.parser = Parser(Perceptron.indepKB, Perceptron.KB, Perceptron.model, State)

        Perceptron.ncpus = FLAGS.ncpus

        Perceptron.ontheflyfd = FLAGS.ontheflyfd
        Perceptron.single_gold = FLAGS.singlegold
        Perceptron.output_prefix = FLAGS.outputprefix
        Perceptron.fdbeamsize = FLAGS.fdbeam

        if Perceptron.ncpus > 0:
            Perceptron.shared_memo_size = int(1024 * 1024 * 1024)  # 1G shared memory
            Perceptron.shared_memo = mmap.mmap(-1, Perceptron.shared_memo_size,
                                               mmap.MAP_SHARED, mmap.PROT_READ | mmap.PROT_WRITE)

        Perceptron.ref_beams = {}
        if FLAGS.ref:
            print >> LOGS, "loading refs",
            hgs = pickle.load(open(FLAGS.ref))
            self.load_hgs(hgs)

        if FLAGS.extraref:
            print >> LOGS, "loading extra refs",
            hgs = pickle.load(open(FLAGS.extraref))
            self.load_hgs(hgs) 
Example 31
Project: JukeBox   Author: gauravsarkar97   File: _util.py    MIT License 5 votes vote down vote up
def insert_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16):
    """Insert size bytes of empty space starting at offset.

    fobj must be an open file object, open rb+ or
    equivalent. Mutagen tries to use mmap to resize the file, but
    falls back to a significantly slower method if mmap fails.

    Args:
        fobj (fileobj)
        size (int): The amount of space to insert
        offset (int): The offset at which to insert the space
    Raises:
        IOError
    """

    if size < 0 or offset < 0:
        raise ValueError

    fobj.seek(0, 2)
    filesize = fobj.tell()
    movesize = filesize - offset

    if movesize < 0:
        raise ValueError

    resize_file(fobj, size, BUFFER_SIZE)

    if mmap is not None:
        try:
            mmap_move(fobj, offset + size, offset, movesize)
        except mmap.error:
            fallback_move(fobj, offset + size, offset, movesize, BUFFER_SIZE)
    else:
        fallback_move(fobj, offset + size, offset, movesize, BUFFER_SIZE) 
Example 32
Project: JukeBox   Author: gauravsarkar97   File: _util.py    MIT License 5 votes vote down vote up
def delete_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16):
    """Delete size bytes of empty space starting at offset.

    fobj must be an open file object, open rb+ or
    equivalent. Mutagen tries to use mmap to resize the file, but
    falls back to a significantly slower method if mmap fails.

    Args:
        fobj (fileobj)
        size (int): The amount of space to delete
        offset (int): The start of the space to delete
    Raises:
        IOError
    """

    if size < 0 or offset < 0:
        raise ValueError

    fobj.seek(0, 2)
    filesize = fobj.tell()
    movesize = filesize - offset - size

    if movesize < 0:
        raise ValueError

    if mmap is not None:
        try:
            mmap_move(fobj, offset, offset + size, movesize)
        except mmap.error:
            fallback_move(fobj, offset, offset + size, movesize, BUFFER_SIZE)
    else:
        fallback_move(fobj, offset, offset + size, movesize, BUFFER_SIZE)

    resize_file(fobj, -size, BUFFER_SIZE) 
Example 33
Project: dcc   Author: amimo   File: DataModel.py    Apache License 2.0 5 votes vote down vote up
def __init__(self, filename):
        self._filename = filename

        self._f = open(filename, "rb")

        # memory-map the file, size 0 means whole file
        self._mapped = mmap.mmap(self._f.fileno(), 0, access=mmap.ACCESS_COPY)

        super(FileDataModel, self).__init__(self._mapped) 
Example 34
Project: WhooshSearch   Author: rokartnaz   File: compound.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def __init__(self, dbfile, use_mmap=True, basepos=0):
        self._file = dbfile
        self.is_closed = False

        # Seek to the end to get total file size (to check if mmap is OK)
        dbfile.seek(0, os.SEEK_END)
        filesize = self._file.tell()
        dbfile.seek(basepos)

        self._diroffset = self._file.read_long()
        self._dirlength = self._file.read_int()
        self._file.seek(self._diroffset)
        self._dir = self._file.read_pickle()
        self._options = self._file.read_pickle()
        self._locks = {}
        self._source = None

        use_mmap = (
            use_mmap
            and hasattr(self._file, "fileno")  # check file is a real file
            and filesize < sys.maxsize  # check fit on 32-bit Python
        )
        if mmap and use_mmap:
            # Try to open the entire segment as a memory-mapped object
            try:
                fileno = self._file.fileno()
                self._source = mmap.mmap(fileno, 0, access=mmap.ACCESS_READ)
            except (mmap.error, OSError):
                e = sys.exc_info()[1]
                # If we got an error because there wasn't enough memory to
                # open the map, ignore it and fall through, we'll just use the
                # (slower) "sub-file" implementation
                if e.errno == errno.ENOMEM:
                    pass
                else:
                    raise
            else:
                # If that worked, we can close the file handle we were given
                self._file.close()
                self._file = None 
Example 35
Project: WhooshSearch   Author: rokartnaz   File: compound.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def open_file(self, name, *args, **kwargs):
        if self.is_closed:
            raise StorageError("Storage was closed")

        offset, length = self.range(name)
        if self._source:
            # Create a memoryview/buffer from the mmap
            buf = memoryview_(self._source, offset, length)
            f = BufferFile(buf, name=name)
        elif hasattr(self._file, "subset"):
            f = self._file.subset(offset, length, name=name)
        else:
            f = StructFile(SubFile(self._file, offset, length), name=name)
        return f 
Example 36
Project: analyzer-website   Author: santiagolizardo   File: __init__.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, filename, flags=STANDARD, cache=True):
        """
        Create and return an GeoIP instance.

        :arg filename: File path to a GeoIP database
        :arg flags: Flags that affect how the database is processed.
            Currently supported flags are STANDARD (default),
            MEMORY_CACHE (preload the whole file into memory) and
            MMAP_CACHE (access the file via mmap)
        :arg cache: Used in tests to skip instance caching
        """
        self._lock = Lock()
        self._flags = flags
        self._netmask = None

        if self._flags & const.MMAP_CACHE and mmap is None:  # pragma: no cover
            import warnings
            warnings.warn("MMAP_CACHE cannot be used without a mmap module")
            self._flags &= ~const.MMAP_CACHE

        if self._flags & const.MMAP_CACHE:
            f = codecs.open(filename, 'rb', ENCODING)
            access = mmap.ACCESS_READ
            self._fp = mmap.mmap(f.fileno(), 0, access=access)
            self._type = 'MMAP_CACHE'
            f.close()
        elif self._flags & const.MEMORY_CACHE:
            f = codecs.open(filename, 'rb', ENCODING)
            self._memory = f.read()
            self._fp = util.str2fp(self._memory)
            self._type = 'MEMORY_CACHE'
            f.close()
        else:
            self._fp = codecs.open(filename, 'rb', ENCODING)
            self._type = 'STANDARD'

        try:
            self._lock.acquire()
            self._setup_segments()
        finally:
            self._lock.release() 
Example 37
Project: nexus3-cli   Author: thiagofigueiro   File: nexus_util.py    MIT License 5 votes vote down vote up
def calculate_hash(hash_name, file_path_or_handle):
    """
    Calculate a hash for the given file.

    :param hash_name: name of the hash algorithm in hashlib
    :type hash_name: str
    :param file_path_or_handle: source file name (:py:obj:`str`) or file
        handle (:py:obj:`file-like`) for the hash algorithm.
    :type file_path_or_handle: str
    :return: the calculated hash
    :rtype: str
    """
    def _hash(_fd):
        h = hashlib.new(hash_name)
        stat = os.fstat(_fd.fileno())
        if stat.st_size > 0:  # can't map a zero-length file
            m = mmap.mmap(_fd.fileno(),
                          stat.st_size, access=mmap.ACCESS_READ)
            h.update(m)
        return h.hexdigest()

    if hasattr(file_path_or_handle, 'read'):
        return _hash(file_path_or_handle)
    else:
        with open(file_path_or_handle, 'rb') as fd:
            return _hash(fd) 
Example 38
Project: NiujiaoDebugger   Author: MrSrc   File: test_os.py    GNU General Public License v3.0 5 votes vote down vote up
def _kill_with_event(self, event, name):
        tagname = "test_os_%s" % uuid.uuid1()
        m = mmap.mmap(-1, 1, tagname)
        m[0] = 0
        # Run a script which has console control handling enabled.
        proc = subprocess.Popen([sys.executable,
                   os.path.join(os.path.dirname(__file__),
                                "win_console_handler.py"), tagname],
                   creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
        # Let the interpreter startup before we send signals. See #3137.
        count, max = 0, 100
        while count < max and proc.poll() is None:
            if m[0] == 1:
                break
            time.sleep(0.1)
            count += 1
        else:
            # Forcefully kill the process if we weren't able to signal it.
            os.kill(proc.pid, signal.SIGINT)
            self.fail("Subprocess didn't finish initialization")
        os.kill(proc.pid, event)
        # proc.send_signal(event) could also be done here.
        # Allow time for the signal to be passed and the process to exit.
        time.sleep(0.5)
        if not proc.poll():
            # Forcefully kill the process if we weren't able to signal it.
            os.kill(proc.pid, signal.SIGINT)
            self.fail("subprocess did not stop on {}".format(name)) 
Example 39
Project: NiujiaoDebugger   Author: MrSrc   File: heap.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, size):
            self.size = size
            for i in range(100):
                name = 'pym-%d-%s' % (os.getpid(), next(self._rand))
                buf = mmap.mmap(-1, size, tagname=name)
                if _winapi.GetLastError() == 0:
                    break
                # We have reopened a preexisting mmap.
                buf.close()
            else:
                raise FileExistsError('Cannot find name for new mmap')
            self.name = name
            self.buffer = buf
            self._state = (self.size, self.name) 
Example 40
Project: NiujiaoDebugger   Author: MrSrc   File: heap.py    GNU General Public License v3.0 5 votes vote down vote up
def __setstate__(self, state):
            self.size, self.name = self._state = state
            self.buffer = mmap.mmap(-1, self.size, tagname=self.name)
            # XXX Temporarily preventing buildbot failures while determining
            # XXX the correct long-term fix. See issue 23060
            #assert _winapi.GetLastError() == _winapi.ERROR_ALREADY_EXISTS 
Example 41
Project: NiujiaoDebugger   Author: MrSrc   File: heap.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, size, fd=-1):
            self.size = size
            self.fd = fd
            if fd == -1:
                self.fd, name = tempfile.mkstemp(
                     prefix='pym-%d-'%os.getpid(),
                     dir=self._choose_dir(size))
                os.unlink(name)
                util.Finalize(self, os.close, (self.fd,))
                os.ftruncate(self.fd, size)
            self.buffer = mmap.mmap(self.fd, self.size) 
Example 42
Project: NiujiaoDebugger   Author: MrSrc   File: heap.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, size=mmap.PAGESIZE):
        self._lastpid = os.getpid()
        self._lock = threading.Lock()
        self._size = size
        self._lengths = []
        self._len_to_seq = {}
        self._start_to_block = {}
        self._stop_to_block = {}
        self._allocated_blocks = set()
        self._arenas = []
        # list of pending blocks to free - see free() comment below
        self._pending_free_blocks = [] 
Example 43
Project: DDEA-DEV   Author: TinyOS-Camp   File: toolset.py    GNU General Public License v2.0 5 votes vote down vote up
def saveObjectBinMmap(obj, filename):
    with os.open(filename, os.O_RDWR) as f:
        try:

            buf = mmap.mmap(f.fileno(), 0, mmap.MAP_SHARED, mmap.PROT_WRITE)
            #buf.seek(0)
            try:
                cPickle.dump(obj, buf, cPickle.HIGHEST_PROTOCOL)
            finally:
                buf.flush()
                buf.close()
        finally:
            f.close() 
Example 44
Project: DDEA-DEV   Author: TinyOS-Camp   File: toolset.py    GNU General Public License v2.0 5 votes vote down vote up
def saveObjectBinMmap(obj, filename):
    with os.open(filename, os.O_RDWR) as f:
        try:

            buf = mmap.mmap(f.fileno(), 0, mmap.MAP_SHARED, mmap.PROT_WRITE)
            #buf.seek(0)
            try:
                cPickle.dump(obj, buf, cPickle.HIGHEST_PROTOCOL)
            finally:
                buf.flush()
                buf.close()
        finally:
            f.close() 
Example 45
Project: DDEA-DEV   Author: TinyOS-Camp   File: toolset.py    GNU General Public License v2.0 5 votes vote down vote up
def saveObjectBinMmap(obj, filename):
    with os.open(filename, os.O_RDWR) as f:
        try:

            buf = mmap.mmap(f.fileno(), 0, mmap.MAP_SHARED, mmap.PROT_WRITE)
            #buf.seek(0)
            try:
                cPickle.dump(obj, buf, cPickle.HIGHEST_PROTOCOL)
            finally:
                buf.flush()
                buf.close()
        finally:
            f.close() 
Example 46
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_c_parser_only.py    MIT License 5 votes vote down vote up
def test_file_handles_mmap(c_parser_only, csv1):
    # gh-14418
    #
    # Don't close user provided file handles.
    parser = c_parser_only

    with open(csv1, "r") as f:
        m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
        parser.read_csv(m)

        assert not m.closed
        m.close() 
Example 47
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: common.py    MIT License 5 votes vote down vote up
def __init__(self, f):
        self.mmap = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) 
Example 48
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: common.py    MIT License 5 votes vote down vote up
def __getattr__(self, name):
        return getattr(self.mmap, name) 
Example 49
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: common.py    MIT License 5 votes vote down vote up
def __next__(self):
        newline = self.mmap.readline()

        # readline returns bytes, not str, but Python's CSV reader
        # expects str, so convert the output to str before continuing
        newline = newline.decode("utf-8")

        # mmap doesn't raise if reading past the allocated
        # data but instead returns an empty string, so raise
        # if that is returned
        if newline == "":
            raise StopIteration
        return newline 
Example 50
Project: QuantStudio   Author: Scorpi000   File: QSObjects.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, cache_size=100):
        self._CacheSize = int(cache_size*2**20)
        self._PutQueue = Queue()
        self._PutLock = Lock()
        self._GetQueue = Queue()
        if os.name=="nt":
            self._TagName = str(uuid.uuid1())# 共享内存的 tag
            self._MMAPCacheData = mmap.mmap(-1, self._CacheSize, tagname=self._TagName)# 当前共享内存缓冲区
        else:
            self._TagName = None# 共享内存的 tag
            self._MMAPCacheData = mmap.mmap(-1, self._CacheSize)# 当前共享内存缓冲区 
Example 51
Project: QuantStudio   Author: Scorpi000   File: QSObjects.py    GNU General Public License v3.0 5 votes vote down vote up
def __setstate__(self, state):
        self.__dict__.update(state)
        if os.name=="nt": self._MMAPCacheData = mmap.mmap(-1, self._CacheSize, tagname=self._TagName) 
Example 52
Project: QuantStudio   Author: Scorpi000   File: RiskDB.py    GNU General Public License v3.0 5 votes vote down vote up
def _prepareRTMMAPCacheData(rt, mmap_cache):
    CacheData, CacheDTs, MMAPCacheData, DTNum = {}, [], mmap_cache, len(rt.ErgodicMode._DateTimes)
    CacheSize = int(rt.ErgodicMode.CacheSize*2**20)
    if os.name=='nt': MMAPCacheData = mmap.mmap(-1, CacheSize, tagname=rt.ErgodicMode._TagName)
    while True:
        Task = rt.ErgodicMode._Queue2SubProcess.get()# 获取任务
        if Task is None: break# 结束进程
        if (Task[0] is None) and (Task[1] is None):# 把数据装入缓存区
            CacheDataByte = pickle.dumps(CacheData)
            DataLen = len(CacheDataByte)
            for i in range(int(DataLen/CacheSize)+1):
                iStartInd = i*CacheSize
                iEndInd = max((i+1)*CacheSize, DataLen)
                if iEndInd>iStartInd:
                    MMAPCacheData.seek(0)
                    MMAPCacheData.write(CacheDataByte[iStartInd:iEndInd])
                    rt.ErgodicMode._Queue2MainProcess.put(iEndInd-iStartInd)
                    rt.ErgodicMode._Queue2SubProcess.get()
            rt.ErgodicMode._Queue2MainProcess.put(0)
            del CacheDataByte
            gc.collect()
        else:# 准备缓冲区
            CurInd = Task[0] + rt.ErgodicMode.ForwardPeriod + 1
            if CurInd < DTNum:# 未到结尾处, 需要再准备缓存数据
                OldCacheDTs = CacheDTs
                CacheDTs = rt.ErgodicMode._DateTimes[max((0, CurInd-rt.ErgodicMode.BackwardPeriod)):min((DTNum, CurInd+rt.ErgodicMode.ForwardPeriod+1))]
                NewCacheDTs = sorted(set(CacheDTs).difference(OldCacheDTs))
                DropDTs = set(OldCacheDTs).difference(CacheDTs)
                for iDT in DropDTs: CacheData.pop(iDT)
                if NewCacheDTs:
                    Cov = rt.__QS_readCov__(dts=NewCacheDTs)
                    for iDT in NewCacheDTs: CacheData[iDT] = {"Cov": Cov[iDT]}
                    Cov = None
    return 0

# 风险表基类 
Example 53
Project: QuantStudio   Author: Scorpi000   File: RiskDB.py    GNU General Public License v3.0 5 votes vote down vote up
def _prepareFRTMMAPCacheData(rt, mmap_cache):
    CacheData, CacheDTs, MMAPCacheData, DTNum = {}, [], mmap_cache, len(rt.ErgodicMode._DateTimes)
    CacheSize = int(rt.ErgodicMode.CacheSize*2**20)
    if os.name=='nt': MMAPCacheData = mmap.mmap(-1, CacheSize, tagname=rt.ErgodicMode._TagName)
    while True:
        Task = rt.ErgodicMode._Queue2SubProcess.get()# 获取任务
        if Task is None: break# 结束进程
        if (Task[0] is None) and (Task[1] is None):# 把数据装入缓存区
            CacheDataByte = pickle.dumps(CacheData)
            DataLen = len(CacheDataByte)
            for i in range(int(DataLen/CacheSize)+1):
                iStartInd = i*CacheSize
                iEndInd = max((i+1)*CacheSize, DataLen)
                if iEndInd>iStartInd:
                    MMAPCacheData.seek(0)
                    MMAPCacheData.write(CacheDataByte[iStartInd:iEndInd])
                    rt.ErgodicMode._Queue2MainProcess.put(iEndInd-iStartInd)
                    rt.ErgodicMode._Queue2SubProcess.get()
            rt.ErgodicMode._Queue2MainProcess.put(0)
            del CacheDataByte
            gc.collect()
        else:# 准备缓冲区
            CurInd = Task[0] + rt.ErgodicMode.ForwardPeriod + 1
            if CurInd < DTNum:# 未到结尾处, 需要再准备缓存数据
                OldCacheDTs = CacheDTs
                CacheDTs = rt.ErgodicMode._DateTimes[max((0, CurInd-rt.ErgodicMode.BackwardPeriod)):min((DTNum, CurInd+rt.ErgodicMode.ForwardPeriod+1))]
                NewCacheDTs = sorted(set(CacheDTs).difference(OldCacheDTs))
                DropDTs = set(OldCacheDTs).difference(CacheDTs)
                for iDT in DropDTs: CacheData.pop(iDT)
                if NewCacheDTs:
                    FactorCov = rt.__QS_readFactorCov__(dts=NewCacheDTs)
                    SpecificRisk = rt.__QS_readSpecificRisk__(dts=NewCacheDTs)
                    FactorData = rt.__QS_readFactorData__(dts=NewCacheDTs)
                    for iDT in NewCacheDTs:
                        CacheData[iDT] = {"FactorCov": FactorCov.loc[iDT],
                                          "SpecificRisk": SpecificRisk.loc[iDT],
                                          "FactorData": FactorData.loc[:, iDT]}
                    FactorCov = SpecificRisk = FactorData = None
    return 0
# 多因子风险表基类 
Example 54
Project: QuantStudio   Author: Scorpi000   File: FactorDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __getstate__(self):
        state = self.__dict__.copy()
        if "_CacheDataProcess" in state: state["_CacheDataProcess"] = None
        return state
# 基于 mmap 的缓冲数据, 如果开启遍历模式, 那么限制缓冲的因子个数, ID 个数, 时间点长度, 缓冲区里是因子的部分数据 
Example 55
Project: QuantStudio   Author: Scorpi000   File: FactorDB.py    GNU General Public License v3.0 5 votes vote down vote up
def start(self, dts, **kwargs):
        if self.ErgodicMode._isStarted: return 0
        self.ErgodicMode._DateTimes = np.array((self.getDateTime() if not self.ErgodicMode.ErgodicDTs else self.ErgodicMode.ErgodicDTs), dtype="O")
        if self.ErgodicMode._DateTimes.shape[0]==0: raise __QS_Error__("因子表: '%s' 的默认时间序列为空, 请设置参数 '遍历模式-遍历时点' !" % self.Name)
        self.ErgodicMode._IDs = (self.getID() if not self.ErgodicMode.ErgodicIDs else list(self.ErgodicMode.ErgodicIDs))
        if not self.ErgodicMode._IDs: raise __QS_Error__("因子表: '%s' 的默认 ID 序列为空, 请设置参数 '遍历模式-遍历ID' !" % self.Name)
        self.ErgodicMode._CurInd = -1# 当前时点在 dts 中的位置, 以此作为缓冲数据的依据
        self.ErgodicMode._DTNum = self.ErgodicMode._DateTimes.shape[0]# 时点数
        self.ErgodicMode._CacheDTs = []# 缓冲的时点序列
        self.ErgodicMode._CacheData = {}# 当前缓冲区
        self.ErgodicMode._CacheFactorNum = 0# 当前缓存因子个数, 小于等于 self.MaxFactorCacheNum
        self.ErgodicMode._CacheIDNum = 0# 当前缓存ID个数, 小于等于 self.MaxIDCacheNum
        self.ErgodicMode._FactorReadNum = pd.Series(0, index=self.FactorNames)# 因子读取次数, pd.Series(读取次数, index=self.FactorNames)
        self.ErgodicMode._IDReadNum = pd.Series()# ID读取次数, pd.Series(读取次数, index=self.FactorNames)
        self.ErgodicMode._Queue2SubProcess = Queue()# 主进程向数据准备子进程发送消息的管道
        self.ErgodicMode._Queue2MainProcess = Queue()# 数据准备子进程向主进程发送消息的管道
        if self.ErgodicMode.CacheSize>0:
            if os.name=="nt":
                self.ErgodicMode._TagName = str(uuid.uuid1())# 共享内存的 tag
                self._MMAPCacheData = None
            else:
                self.ErgodicMode._TagName = None# 共享内存的 tag
                self._MMAPCacheData = mmap.mmap(-1, int(self.ErgodicMode.CacheSize*2**20))# 当前共享内存缓冲区
            if self.ErgodicMode.CacheMode=="因子": self.ErgodicMode._CacheDataProcess = Process(target=_prepareMMAPFactorCacheData, args=(self, self._MMAPCacheData), daemon=True)
            else: self.ErgodicMode._CacheDataProcess = Process(target=_prepareMMAPIDCacheData, args=(self, self._MMAPCacheData), daemon=True)
            self.ErgodicMode._CacheDataProcess.start()
            if os.name=="nt": self._MMAPCacheData = mmap.mmap(-1, int(self.ErgodicMode.CacheSize*2**20), tagname=self.ErgodicMode._TagName)# 当前共享内存缓冲区
        self.ErgodicMode._isStarted = True
        return 0
    # 时间点向前移动, idt: 时间点, datetime.dateime 
Example 56
Project: recruit   Author: Frank-qlu   File: test_c_parser_only.py    Apache License 2.0 5 votes vote down vote up
def test_file_handles_mmap(c_parser_only, csv1):
    # gh-14418
    #
    # Don't close user provided file handles.
    parser = c_parser_only

    with open(csv1, "r") as f:
        m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
        parser.read_csv(m)

        if PY3:
            assert not m.closed
        m.close() 
Example 57
Project: recruit   Author: Frank-qlu   File: common.py    Apache License 2.0 5 votes vote down vote up
def __init__(self, f):
        self.mmap = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) 
Example 58
Project: recruit   Author: Frank-qlu   File: common.py    Apache License 2.0 5 votes vote down vote up
def __getattr__(self, name):
        return getattr(self.mmap, name) 
Example 59
Project: recruit   Author: Frank-qlu   File: common.py    Apache License 2.0 5 votes vote down vote up
def __next__(self):
        newline = self.mmap.readline()

        # readline returns bytes, not str, in Python 3,
        # but Python's CSV reader expects str, so convert
        # the output to str before continuing
        if compat.PY3:
            newline = compat.bytes_to_str(newline)

        # mmap doesn't raise if reading past the allocated
        # data but instead returns an empty string, so raise
        # if that is returned
        if newline == '':
            raise StopIteration
        return newline 
Example 60
Project: 2018-1-OSS-E7   Author: 18-1-SKKU-OSS   File: ttf.py    MIT License 5 votes vote down vote up
def __init__(self, filename):
		"""Read the given truetype file.

		:Parameters:
			`filename`
				The name of any Windows, OS2 or Macintosh Truetype file.

		The object must be closed (see `close`) after use.

		An exception will be raised if the file does not exist or cannot
		be read.
		"""
		if not filename: filename = ''
		len = os.stat(filename).st_size
		self._fileno = os.open(filename, os.O_RDONLY)
		if hasattr(mmap, 'MAP_SHARED'):
			self._data = mmap.mmap(self._fileno, len, mmap.MAP_SHARED,
				mmap.PROT_READ)
		else:
			self._data = mmap.mmap(self._fileno, len, None, mmap.ACCESS_READ)

		offsets = _read_offset_table(self._data, 0)
		self._tables = {}
		for table in _read_table_directory_entry.array(self._data,
			offsets.size, offsets.num_tables):
			self._tables[table.tag] = table

		self._names = None
		self._horizontal_metrics = None
		self._character_advances = None
		self._character_kernings = None
		self._glyph_kernings = None
		self._character_map = None
		self._glyph_map = None
		self._font_selection_flags = None
		self._glyph_vectors = None

		self.header = \
			_read_head_table(self._data, self._tables['head'].offset)
		self.horizontal_header = \
			_read_horizontal_header(self._data, self._tables['hhea'].offset) 
Example 61
Project: pytinyhdfs   Author: vietor   File: WebHDFS.py    MIT License 5 votes vote down vote up
def putFile(self, local_file, target_file, replication=1, overwrite=True):
        with open(local_file, "rb") as rfile:
            stat = os.fstat(rfile.fileno())
            if stat.st_size < 1:
                file_obj = rfile
            else:
                file_obj = mmap.mmap(
                    rfile.fileno(), 0, access=mmap.ACCESS_READ)
            try:
                return self.put(file_obj, target_file, replication, overwrite)
            finally:
                if stat.st_size > 0:
                    file_obj.close() 
Example 62
Project: holodeck   Author: BYU-PCCL   File: shmem.py    MIT License 5 votes vote down vote up
def __init__(self, name, shape, dtype=np.float32, uuid=""):
        self.shape = shape
        self.dtype = dtype
        size = reduce(lambda x, y: x * y, shape)
        size_bytes = np.dtype(dtype).itemsize * size

        self._mem_path = None
        self._mem_pointer = None
        if os.name == "nt":
            self._mem_path = "/HOLODECK_MEM" + uuid + "_" + name
            self._mem_pointer = mmap.mmap(0, size_bytes, self._mem_path)
        elif os.name == "posix":
            self._mem_path = "/dev/shm/HOLODECK_MEM" + uuid + "_" + name
            f = os.open(self._mem_path, os.O_CREAT | os.O_TRUNC | os.O_RDWR)
            self._mem_file = f
            os.ftruncate(f, size_bytes)
            os.fsync(f)

            # TODO - I think we are leaking a file object here. Unfortunately, we
            #        can't just .close() it since numpy acquires a reference to it
            #        below and I can't find a way to release it in __linux_unlink__()
            self._mem_pointer = mmap.mmap(f, size_bytes)
        else:
            raise HolodeckException("Currently unsupported os: " + os.name)

        self.np_array = np.ndarray(shape, dtype=dtype)
        self.np_array.data = (Shmem._numpy_to_ctype[dtype] * size).from_buffer(self._mem_pointer) 
Example 63
Project: perceptio   Author: tryexceptpass   File: visualize.py    MIT License 5 votes vote down vote up
def represent(path):
    """Show a square image that represents the file specified"""

    data = None
    with open(path, 'rb') as infile:
        data = bytearray(mmap.mmap(infile.fileno(), 0, access=mmap.ACCESS_READ))
    side = math.ceil(math.sqrt(len(data)))
    img = Image.new('RGB', (side, side), "black")
    pixels = img.load()
    for i in range(side):
        for j in range(side):
            index = i*side+j
            if index < len(data):
                pixels[i, j] = (data[index], data[index], 0)
    img.show() 
Example 64
Project: apk_api_key_extractor   Author: alessandrodd   File: strings_tool.py    Apache License 2.0 5 votes vote down vote up
def strings(file_name, sections=None, min_length=4):
    """
    Finds all strings in a file; if it's an ELF file, you can specify where (in which section) to
    look for the strings.

    :param file_name: name of the file to be examined
    :param sections: a list of strings which identify the ELF sections; should be used only with ELF files
    :param min_length:
    :return:
    """
    pattern = '([\x20-\x7E]{' + str(min_length) + '}[\x20-\x7E]*)'  # ASCII table from character space to tilde
    pattern = pattern.encode()
    regexp = re.compile(pattern)
    if not sections:
        with open(file_name, 'rb') as f, mmap(f.fileno(), 0, access=ACCESS_READ) as m:
            for match in regexp.finditer(m):
                yield str(match.group(0), 'utf-8')
    else:
        with open(file_name, 'rb') as f:
            elffile = ELFFile(f)
            for section in sections:
                try:
                    sec = elffile.get_section_by_name(section)
                except AttributeError:
                    # section not found
                    continue
                # skip section if missing in elf file
                if not sec:
                    continue
                offset = sec['sh_offset']
                size = sec['sh_size']
                if offset is None or size is None:
                    continue
                # round to allocation granularity for mmap
                offset = max(offset - offset % ALLOCATIONGRANULARITY, 0)
                with mmap(f.fileno(), size, access=ACCESS_READ, offset=offset) as m:
                    for match in regexp.finditer(m):
                        yield str(match.group(0), 'utf-8') 
Example 65
Project: Caver   Author: guokr   File: preprocess.py    GNU General Public License v3.0 5 votes vote down vote up
def get_num_lines(file_path):
    fp = open(file_path, "r+")
    buf = mmap.mmap(fp.fileno(), 0)
    lines = 0
    while buf.readline():
        lines += 1
    return lines 
Example 66
Project: pyblish-win   Author: pyblish   File: heap.py    GNU Lesser General Public License v3.0 4 votes vote down vote up
def __init__(self, size):
            self.size = size
            self.name = 'pym-%d-%d' % (os.getpid(), Arena._counter.next())
            self.buffer = mmap.mmap(-1, self.size, tagname=self.name)
            assert win32.GetLastError() == 0, 'tagname already in use'
            self._state = (self.size, self.name) 
Example 67
Project: pyblish-win   Author: pyblish   File: heap.py    GNU Lesser General Public License v3.0 4 votes vote down vote up
def __init__(self, size=mmap.PAGESIZE):
        self._lastpid = os.getpid()
        self._lock = threading.Lock()
        self._size = size
        self._lengths = []
        self._len_to_seq = {}
        self._start_to_block = {}
        self._stop_to_block = {}
        self._allocated_blocks = set()
        self._arenas = []
        # list of pending blocks to free - see free() comment below
        self._pending_free_blocks = [] 
Example 68
Project: crackdb   Author: kryc   File: binarydb.py    GNU General Public License v3.0 4 votes vote down vote up
def binarySearch(handle, value, width=None, mm=None, entries=None):
    '''
    Generic binary search function for dealing with large binary files
    It is able to quickly search a sorted binary file for a given value
    '''
    def _search(mm, left, right, value, width=None):
        if width is None:
            width = len(value)
        if right >= left:
            mid = left + (right-left)/2
            n = mm[mid*width:mid*width+width]
            h = n[:len(value)]
            comparison = memcmp(h, value)
            if comparison == 0:
                assert(len(h) == len(value))
                results = [n,]
                # Found the first result, new seek each side to find other matches
                for i in xrange(mid-1, -1, -1):
                    n = mm[i*width:i*width+width]
                    h = n[:len(value)]
                    assert(len(h) == len(value))
                    if h == value:
                        results.append(n)
                    else:
                        break
                for i in xrange(mid+1, entries):
                    n = mm[i*width:i*width+width]
                    h = n[:len(value)]
                    assert(len(h) == len(value))
                    if h == value:
                        results.append(n)
                    else:
                        break
                return results
            elif comparison < 0:
                return _search(mm, mid+1, right, value, width)
            else:
                return _search(mm, left, mid-1, value, width)
        else:
            return []
    opened = False
    if mm is None:
        try:
            mm = mmap.mmap(handle.fileno(), 0)
        except ValueError:
            return []
        opened = True
    if entries is None:
        handle.seek(0, os.SEEK_END)
        entries = handle.tell() / (width if width != None else len(value))
        handle.seek(0)
    result = _search(mm, 0, entries-1, value, width)
    if opened:
        mm.close()
    return result 
Example 69
Project: me-ica   Author: ME-ICA   File: netcdf.py    GNU Lesser General Public License v2.1 4 votes vote down vote up
def __init__(self, filename, mode='r', mmap=None, version=1):
        """Initialize netcdf_file from fileobj (str or file-like).

        Parameters
        ----------
        filename : string or file-like
           string -> filename
        mode : {'r', 'w'}, optional
           read-write mode, default is 'r'
        mmap : None or bool, optional
           Whether to mmap `filename` when reading.  Default is True
           when `filename` is a file name, False when `filename` is a
           file-like object
        version : {1, 2}, optional
           version of netcdf to read / write, where 1 means *Classic
           format* and 2 means *64-bit offset format*.  Default is 1.  See
           http://www.unidata.ucar.edu/software/netcdf/docs/netcdf/Which-Format.html#Which-Format
        """
        if hasattr(filename, 'seek'): # file-like
            self.fp = filename
            self.filename = 'None'
            if mmap is None:
                mmap = False
            elif mmap and not hasattr(filename, 'fileno'):
                raise ValueError('Cannot use file object for mmap')
        else: # maybe it's a string
            self.filename = filename
            self.fp = open(self.filename, '%sb' % mode)
            if mmap is None:
                mmap  = True
        self.use_mmap = mmap
        self.version_byte = version

        if not mode in 'rw':
            raise ValueError("Mode must be either 'r' or 'w'.")
        self.mode = mode

        self.dimensions = {}
        self.variables = {}

        self._dims = []
        self._recs = 0
        self._recsize = 0

        self._attributes = {}

        if mode == 'r':
            self._read() 
Example 70
Project: JukeBox   Author: gauravsarkar97   File: _util.py    MIT License 4 votes vote down vote up
def mmap_move(fileobj, dest, src, count):
    """Mmaps the file object if possible and moves 'count' data
    from 'src' to 'dest'. All data has to be inside the file size
    (enlarging the file through this function isn't possible)

    Will adjust the file offset.

    Args:
        fileobj (fileobj)
        dest (int): The destination offset
        src (int): The source offset
        count (int) The amount of data to move
    Raises:
        mmap.error: In case move failed
        IOError: In case an operation on the fileobj fails
        ValueError: In case invalid parameters were given
    """

    assert mmap is not None, "no mmap support"

    if dest < 0 or src < 0 or count < 0:
        raise ValueError("Invalid parameters")

    try:
        fileno = fileobj.fileno()
    except (AttributeError, IOError):
        raise mmap.error(
            "File object does not expose/support a file descriptor")

    fileobj.seek(0, 2)
    filesize = fileobj.tell()
    length = max(dest, src) + count

    if length > filesize:
        raise ValueError("Not in file size boundary")

    offset = ((min(dest, src) // mmap.ALLOCATIONGRANULARITY) *
              mmap.ALLOCATIONGRANULARITY)
    assert dest >= offset
    assert src >= offset
    assert offset % mmap.ALLOCATIONGRANULARITY == 0

    # Windows doesn't handle empty mappings, add a fast path here instead
    if count == 0:
        return

    # fast path
    if src == dest:
        return

    fileobj.flush()
    file_map = mmap.mmap(fileno, length - offset, offset=offset)
    try:
        file_map.move(dest - offset, src - offset, count)
    finally:
        file_map.close() 
Example 71
Project: LaserTOF   Author: kyleuckert   File: netcdf.py    MIT License 4 votes vote down vote up
def __init__(self, filename, mode='r', mmap=None, version=1,
                 maskandscale=False):
        """Initialize netcdf_file from fileobj (str or file-like)."""
        if mode not in 'rwa':
            raise ValueError("Mode must be either 'r', 'w' or 'a'.")

        if hasattr(filename, 'seek'):  # file-like
            self.fp = filename
            self.filename = 'None'
            if mmap is None:
                mmap = False
            elif mmap and not hasattr(filename, 'fileno'):
                raise ValueError('Cannot use file object for mmap')
        else:  # maybe it's a string
            self.filename = filename
            omode = 'r+' if mode == 'a' else mode
            self.fp = open(self.filename, '%sb' % omode)
            if mmap is None:
                mmap = True

        if mode != 'r':
            # Cannot read write-only files
            mmap = False

        self.use_mmap = mmap
        self.mode = mode
        self.version_byte = version
        self.maskandscale = maskandscale

        self.dimensions = OrderedDict()
        self.variables = OrderedDict()

        self._dims = []
        self._recs = 0
        self._recsize = 0

        self._mm = None
        self._mm_buf = None
        if self.use_mmap:
            self._mm = mm.mmap(self.fp.fileno(), 0, access=mm.ACCESS_READ)
            self._mm_buf = np.frombuffer(self._mm, dtype=np.int8)

        self._attributes = OrderedDict()

        if mode in 'ra':
            self._read() 
Example 72
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: common.py    MIT License 4 votes vote down vote up
def get_filepath_or_buffer(
    filepath_or_buffer, encoding=None, compression=None, mode=None
):
    """
    If the filepath_or_buffer is a url, translate and return the buffer.
    Otherwise passthrough.

    Parameters
    ----------
    filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
                         or buffer
    compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional
    encoding : the encoding to use to decode bytes, default is 'utf-8'
    mode : str, optional

    Returns
    -------
    tuple of ({a filepath_ or buffer or S3File instance},
              encoding, str,
              compression, str,
              should_close, bool)
    """
    filepath_or_buffer = _stringify_path(filepath_or_buffer)

    if _is_url(filepath_or_buffer):
        req = urlopen(filepath_or_buffer)
        content_encoding = req.headers.get("Content-Encoding", None)
        if content_encoding == "gzip":
            # Override compression based on Content-Encoding header
            compression = "gzip"
        reader = BytesIO(req.read())
        req.close()
        return reader, encoding, compression, True

    if is_s3_url(filepath_or_buffer):
        from pandas.io import s3

        return s3.get_filepath_or_buffer(
            filepath_or_buffer, encoding=encoding, compression=compression, mode=mode
        )

    if is_gcs_url(filepath_or_buffer):
        from pandas.io import gcs

        return gcs.get_filepath_or_buffer(
            filepath_or_buffer, encoding=encoding, compression=compression, mode=mode
        )

    if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)):
        return _expand_user(filepath_or_buffer), None, compression, False

    if not is_file_like(filepath_or_buffer):
        msg = "Invalid file path or buffer object type: {_type}"
        raise ValueError(msg.format(_type=type(filepath_or_buffer)))

    return filepath_or_buffer, None, compression, False 
Example 73
Project: QuantStudio   Author: Scorpi000   File: FactorDB.py    GNU General Public License v3.0 4 votes vote down vote up
def _prepareMMAPFactorCacheData(ft, mmap_cache):
    CacheData, CacheDTs, MMAPCacheData, DTNum = {}, [], mmap_cache, len(ft.ErgodicMode._DateTimes)
    CacheSize = int(ft.ErgodicMode.CacheSize*2**20)
    if os.name=='nt': MMAPCacheData = mmap.mmap(-1, CacheSize, tagname=ft.ErgodicMode._TagName)
    while True:
        Task = ft.ErgodicMode._Queue2SubProcess.get()# 获取任务
        if Task is None: break# 结束进程
        if (Task[0] is None) and (Task[1] is None):# 把数据装入缓存区
            CacheDataByte = pickle.dumps(CacheData)
            DataLen = len(CacheDataByte)
            for i in range(int(DataLen/CacheSize)+1):
                iStartInd = i*CacheSize
                iEndInd = min((i+1)*CacheSize, DataLen)
                if iEndInd>iStartInd:
                    MMAPCacheData.seek(0)
                    MMAPCacheData.write(CacheDataByte[iStartInd:iEndInd])
                    ft.ErgodicMode._Queue2MainProcess.put(iEndInd-iStartInd)
                    ft.ErgodicMode._Queue2SubProcess.get()
            ft.ErgodicMode._Queue2MainProcess.put(0)
            del CacheDataByte
            gc.collect()
        elif Task[0] is None:# 调整缓存区
            NewFactors, PopFactors = Task[1]
            for iFactorName in PopFactors: CacheData.pop(iFactorName)
            if NewFactors:
                #print("调整缓存区: "+str(NewFactors))# debug
                if CacheDTs:
                    CacheData.update(dict(ft.__QS_calcData__(raw_data=ft.__QS_prepareRawData__(factor_names=NewFactors, ids=ft.ErgodicMode._IDs, dts=CacheDTs), factor_names=NewFactors, ids=ft.ErgodicMode._IDs, dts=CacheDTs)))
                else:
                    CacheData.update({iFactorName: pd.DataFrame(index=CacheDTs, columns=ft.ErgodicMode._IDs) for iFactorName in NewFactors})
        else:# 准备缓存区
            CurInd = Task[0] + ft.ErgodicMode.ForwardPeriod + 1
            if CurInd < DTNum:# 未到结尾处, 需要再准备缓存数据
                OldCacheDTs = set(CacheDTs)
                CacheDTs = ft.ErgodicMode._DateTimes[max((0, CurInd-ft.ErgodicMode.BackwardPeriod)):min((DTNum, CurInd+ft.ErgodicMode.ForwardPeriod+1))].tolist()
                NewCacheDTs = sorted(set(CacheDTs).difference(OldCacheDTs))
                if CacheData:
                    isDisjoint = OldCacheDTs.isdisjoint(CacheDTs)
                    CacheFactorNames = list(CacheData.keys())
                    #print("准备缓存区: "+str(CacheFactorNames))# debug
                    if NewCacheDTs:
                        NewCacheData = ft.__QS_calcData__(raw_data=ft.__QS_prepareRawData__(factor_names=CacheFactorNames, ids=ft.ErgodicMode._IDs, dts=NewCacheDTs), factor_names=CacheFactorNames, ids=ft.ErgodicMode._IDs, dts=NewCacheDTs)
                    else:
                        NewCacheData = pd.Panel(items=CacheFactorNames, major_axis=NewCacheDTs, minor_axis=ft.ErgodicMode._IDs)
                    for iFactorName in CacheData:
                        if isDisjoint:
                            CacheData[iFactorName] = NewCacheData[iFactorName]
                        else:
                            CacheData[iFactorName] = CacheData[iFactorName].loc[CacheDTs, :]
                            CacheData[iFactorName].loc[NewCacheDTs, :] = NewCacheData[iFactorName]
                    NewCacheData = None
    return 0
# 基于 mmap 的 ID 缓冲的因子表, 如果开启遍历模式, 那么限制缓冲的 ID 个数和时间点长度, 缓冲区里是 ID 的部分数据 
Example 74
Project: QuantStudio   Author: Scorpi000   File: FactorDB.py    GNU General Public License v3.0 4 votes vote down vote up
def _prepareMMAPIDCacheData(ft, mmap_cache):
    CacheData, CacheDTs, MMAPCacheData, DTNum = {}, [], mmap_cache, len(ft.ErgodicMode._DateTimes)
    CacheSize = int(ft.ErgodicMode.CacheSize*2**20)
    if os.name=='nt': MMAPCacheData = mmap.mmap(-1, CacheSize, tagname=ft.ErgodicMode._TagName)
    while True:
        Task = ft.ErgodicMode._Queue2SubProcess.get()# 获取任务
        if Task is None: break# 结束进程
        if (Task[0] is None) and (Task[1] is None):# 把数据装入缓冲区
            CacheDataByte = pickle.dumps(CacheData)
            DataLen = len(CacheDataByte)
            for i in range(int(DataLen/CacheSize)+1):
                iStartInd = i*CacheSize
                iEndInd = min((i+1)*CacheSize, DataLen)
                if iEndInd>iStartInd:
                    MMAPCacheData.seek(0)
                    MMAPCacheData.write(CacheDataByte[iStartInd:iEndInd])
                    ft.ErgodicMode._Queue2MainProcess.put(iEndInd-iStartInd)
                    ft.ErgodicMode._Queue2SubProcess.get()
            ft.ErgodicMode._Queue2MainProcess.put(0)
            del CacheDataByte
            gc.collect()
        elif Task[0] is None:# 调整缓存区数据
            NewID, PopID = Task[1]
            if PopID: CacheData.pop(PopID)# 用新 ID 数据替换旧 ID
            if NewID:
                if CacheDTs:
                    CacheData[NewID] = ft.__QS_calcData__(raw_data=ft.__QS_prepareRawData__(factor_names=ft.FactorNames, ids=[NewID], dts=CacheDTs), factor_names=ft.FactorNames, ids=[NewID], dts=CacheDTs).iloc[:, :, 0]
                else:
                    CacheData[NewID] = pd.DataFrame(index=CacheDTs, columns=ft.FactorNames)
        else:# 准备缓冲区
            CurInd = Task[0] + ft.ErgodicMode.ForwardPeriod + 1
            if CurInd<DTNum:# 未到结尾处, 需要再准备缓存数据
                OldCacheDTs = set(CacheDTs)
                CacheDTs = ft.ErgodicMode._DateTimes[max((0, CurInd-ft.ErgodicMode.BackwardPeriod)):min((DTNum, CurInd+ft.ErgodicMode.ForwardPeriod+1))].tolist()
                NewCacheDTs = sorted(set(CacheDTs).difference(OldCacheDTs))
                if CacheData:
                    isDisjoint = OldCacheDTs.isdisjoint(CacheDTs)
                    CacheIDs = list(CacheData.keys())
                    if NewCacheDTs:
                        NewCacheData = ft.__QS_calcData__(raw_data=ft.__QS_prepareRawData__(factor_names=ft.FactorNames, ids=CacheIDs, dts=NewCacheDTs), factor_names=ft.FactorNames, ids=CacheIDs, dts=NewCacheDTs)
                    else:
                        NewCacheData = pd.Panel(items=ft.FactorNames, major_axis=NewCacheDTs, minor_axis=CacheIDs)
                    for iID in CacheData:
                        if isDisjoint:
                            CacheData[iID] = NewCacheData.loc[:, :, iID]
                        else:
                            CacheData[iID] = CacheData[iID].loc[CacheDTs, :]
                            CacheData[iID].loc[NewCacheDTs, :] = NewCacheData.loc[:, :, iID]
                    NewCacheData = None
    return 0
# 因子表的运算模式参数对象 
Example 75
Project: recruit   Author: Frank-qlu   File: common.py    Apache License 2.0 4 votes vote down vote up
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
                           compression=None, mode=None):
    """
    If the filepath_or_buffer is a url, translate and return the buffer.
    Otherwise passthrough.

    Parameters
    ----------
    filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
                         or buffer
    encoding : the encoding to use to decode py3 bytes, default is 'utf-8'
    mode : str, optional

    Returns
    -------
    tuple of ({a filepath_ or buffer or S3File instance},
              encoding, str,
              compression, str,
              should_close, bool)
    """
    filepath_or_buffer = _stringify_path(filepath_or_buffer)

    if _is_url(filepath_or_buffer):
        req = _urlopen(filepath_or_buffer)
        content_encoding = req.headers.get('Content-Encoding', None)
        if content_encoding == 'gzip':
            # Override compression based on Content-Encoding header
            compression = 'gzip'
        reader = BytesIO(req.read())
        req.close()
        return reader, encoding, compression, True

    if is_s3_url(filepath_or_buffer):
        from pandas.io import s3
        return s3.get_filepath_or_buffer(filepath_or_buffer,
                                         encoding=encoding,
                                         compression=compression,
                                         mode=mode)

    if is_gcs_url(filepath_or_buffer):
        from pandas.io import gcs
        return gcs.get_filepath_or_buffer(filepath_or_buffer,
                                          encoding=encoding,
                                          compression=compression,
                                          mode=mode)

    if isinstance(filepath_or_buffer, (compat.string_types,
                                       compat.binary_type,
                                       mmap.mmap)):
        return _expand_user(filepath_or_buffer), None, compression, False

    if not is_file_like(filepath_or_buffer):
        msg = "Invalid file path or buffer object type: {_type}"
        raise ValueError(msg.format(_type=type(filepath_or_buffer)))

    return filepath_or_buffer, None, compression, False 
Example 76
Project: binaryanalysis   Author: armijnhemel   File: batxor.py    Apache License 2.0 4 votes vote down vote up
def searchUnpackXOR(filename, tempdir=None, blacklist=[], offsets={}, scanenv={}, debug=False):
	hints = []
	diroffsets = []

	## If something else already unpacked (parts) of the file we're not
	## going to continue.
	if 'BAT_UNPACKED' in scanenv:
		if scanenv['BAT_UNPACKED'] == 'True':
			return (diroffsets, blacklist, [], hints)

	if 'XOR_MINIMUM' in scanenv:
		xor_minimum = int(scanenv['XOR_MINIMUM'])
	else:
		xor_minimum = 0
	## only continue if no other scan has succeeded
	if blacklist != []:
		return (diroffsets, blacklist, [], hints)
	counter = 1

	## only continue if we actually have signatures
	if signatures == {}:
		return (diroffsets, blacklist, [], hints)

	## open the file, so we can search for signatures
	## TODO: use the identifier search we have elsewhere.
	datafile = os.open(filename, os.O_RDONLY)
	datamm = mmap.mmap(datafile, 0, access=mmap.ACCESS_READ)

	tmpdir = fwunpack.dirsetup(tempdir, filename, "xor", counter)
	res = None
	for s in signatures:
		bs = reduce(lambda x, y: x + y, signatures[s])
		## find all instances of the signature. We might want to tweak
		## this a bit.
		bsres = datamm.find(bs)
		if bsres == -1:
			continue
		siginstances = [bsres]
		while bsres != -1:
			bsres = datamm.find(bs, bsres +1)
			if bsres != -1:
				siginstances.append(bsres)
		if len(siginstances) > 0:
			if len(siginstances) < xor_minimum:
				continue
			res = unpackXOR(filename, s, tmpdir)
			if res != None:
				diroffsets.append((res, 0, os.stat(filename).st_size))
				## blacklist the whole file
				blacklist.append((0, os.stat(filename).st_size))
				break
	datamm.close()
	os.close(datafile)
	if res == None:
		os.rmdir(tmpdir)
		return (diroffsets, blacklist, [], hints)
	return (diroffsets, blacklist, ['temporary'], hints) 
Example 77
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: netcdf.py    GNU General Public License v3.0 4 votes vote down vote up
def __init__(self, filename, mode='r', mmap=None, version=1):
        """Initialize netcdf_file from fileobj (str or file-like)."""
        if mode not in 'rwa':
            raise ValueError("Mode must be either 'r', 'w' or 'a'.")

        if hasattr(filename, 'seek'):  # file-like
            self.fp = filename
            self.filename = 'None'
            if mmap is None:
                mmap = False
            elif mmap and not hasattr(filename, 'fileno'):
                raise ValueError('Cannot use file object for mmap')
        else:  # maybe it's a string
            self.filename = filename
            omode = 'r+' if mode == 'a' else mode
            self.fp = open(self.filename, '%sb' % omode)
            if mmap is None:
                mmap = True

        if mode != 'r':
            # Cannot read write-only files
            mmap = False

        self.use_mmap = mmap
        self.mode = mode
        self.version_byte = version

        self.dimensions = {}
        self.variables = {}

        self._dims = []
        self._recs = 0
        self._recsize = 0

        self._mm = None
        self._mm_buf = None
        if self.use_mmap:
            self._mm = mm.mmap(self.fp.fileno(), 0, access=mm.ACCESS_READ)
            self._mm_buf = np.frombuffer(self._mm, dtype=np.int8)

        self._attributes = {}

        if mode in 'ra':
            self._read() 
Example 78
Project: py-uio   Author: mvduin   File: device.py    MIT License 4 votes vote down vote up
def __init__( rgn, parent, address, size, name=None, uio=None, index=None ):
        if parent == None and uio == None:
            raise ValueError( "parent region or uio device required" )
        if size < 0:
            raise ValueError( "invalid size" )

        # parent memory region (if any)
        rgn.parent = parent

        # physical address range
        rgn.address = address
        rgn.size = size
        rgn.end = address + size

        # identification
        rgn.name = name
        rgn.uio = uio
        rgn.index = index

        # memory mapping
        rgn.mappable = 0
        rgn._mmap = None

        # nothing to map
        if size == 0:
            return

        if parent:
            # need to use parent's mapping
            if rgn not in parent:
                raise ValueError( "memory region not inside parent" )

            offset = rgn.address - parent.address
            if offset >= parent.mappable:
                return

            rgn.mappable = min( parent.mappable - offset, size )
            rgn._mmap = parent._mmap[ offset : offset + rgn.mappable ]

        elif rgn.address & ~PAGE_MASK:
            return    # not page-aligned, can't be mapped

        else:
            # round down to integral number of pages
            rgn.mappable = size & PAGE_MASK

            # UIO uses a disgusting hack where the memory map index is
            # passed via the offset argument.  In the actual kernel call
            # the offset (and length) are in pages rather than bytes, hence
            # we actually need to pass index * PAGE_SIZE as offset.
            rgn._mmap = memoryview( mmap( rgn.uio._fd, rgn.mappable,
                                            offset = rgn.index * PAGE_SIZE ) ) 
Example 79
Project: nova   Author: ZhanHan   File: driver.py    Apache License 2.0 4 votes vote down vote up
def _supports_direct_io(dirpath):

        if not hasattr(os, 'O_DIRECT'):
            LOG.debug("This python runtime does not support direct I/O")
            return False

        testfile = os.path.join(dirpath, ".directio.test")

        hasDirectIO = True
        fd = None
        try:
            fd = os.open(testfile, os.O_CREAT | os.O_WRONLY | os.O_DIRECT)
            # Check is the write allowed with 512 byte alignment
            align_size = 512
            m = mmap.mmap(-1, align_size)
            m.write(b"x" * align_size)
            os.write(fd, m)
            LOG.debug("Path '%(path)s' supports direct I/O",
                      {'path': dirpath})
        except OSError as e:
            if e.errno == errno.EINVAL:
                LOG.debug("Path '%(path)s' does not support direct I/O: "
                          "'%(ex)s'", {'path': dirpath, 'ex': e})
                hasDirectIO = False
            else:
                with excutils.save_and_reraise_exception():
                    LOG.error(_LE("Error on '%(path)s' while checking "
                                  "direct I/O: '%(ex)s'"),
                              {'path': dirpath, 'ex': e})
        except Exception as e:
            with excutils.save_and_reraise_exception():
                LOG.error(_LE("Error on '%(path)s' while checking direct I/O: "
                              "'%(ex)s'"), {'path': dirpath, 'ex': e})
        finally:
            # ensure unlink(filepath) will actually remove the file by deleting
            # the remaining link to it in close(fd)
            if fd is not None:
                os.close(fd)

            try:
                os.unlink(testfile)
            except Exception:
                pass

        return hasDirectIO 
Example 80
Project: tf-pose   Author: SrikanthVelpuri   File: RemoteGraphicsView.py    Apache License 2.0 4 votes vote down vote up
def renderView(self):
        if self.img is None:
            ## make sure shm is large enough and get its address
            if self.width() == 0 or self.height() == 0:
                return
            size = self.width() * self.height() * 4
            if size > self.shm.size():
                if sys.platform.startswith('win'):
                    ## windows says "WindowsError: [Error 87] the parameter is incorrect" if we try to resize the mmap
                    self.shm.close()
                    ## it also says (sometimes) 'access is denied' if we try to reuse the tag.
                    self.shmtag = "pyqtgraph_shmem_" + ''.join([chr((random.getrandbits(20)%25) + 97) for i in range(20)])
                    self.shm = mmap.mmap(-1, size, self.shmtag)
                elif sys.platform == 'darwin':
                    self.shm.close()
                    self.shmFile.close()
                    self.shmFile = tempfile.NamedTemporaryFile(prefix='pyqtgraph_shmem_')
                    self.shmFile.write(b'\x00' * (size + 1))
                    self.shmFile.flush()
                    self.shm = mmap.mmap(self.shmFile.fileno(), size, mmap.MAP_SHARED, mmap.PROT_WRITE)
                else:
                    self.shm.resize(size)
            
            ## render the scene directly to shared memory
            if USE_PYSIDE:
                ch = ctypes.c_char.from_buffer(self.shm, 0)
                #ch = ctypes.c_char_p(address)
                self.img = QtGui.QImage(ch, self.width(), self.height(), QtGui.QImage.Format_ARGB32)
            else:
                address = ctypes.addressof(ctypes.c_char.from_buffer(self.shm, 0))

                # different versions of pyqt have different requirements here..
                try:
                    self.img = QtGui.QImage(sip.voidptr(address), self.width(), self.height(), QtGui.QImage.Format_ARGB32)
                except TypeError:
                    try:
                        self.img = QtGui.QImage(memoryview(buffer(self.shm)), self.width(), self.height(), QtGui.QImage.Format_ARGB32)
                    except TypeError:
                        # Works on PyQt 4.9.6
                        self.img = QtGui.QImage(address, self.width(), self.height(), QtGui.QImage.Format_ARGB32)
            self.img.fill(0xffffffff)
            p = QtGui.QPainter(self.img)
            self.render(p, self.viewRect(), self.rect())
            p.end()
            self.sceneRendered.emit((self.width(), self.height(), self.shm.size(), self.shmFileName()))