Python numpy.frombuffer() Examples

The following are 30 code examples for showing how to use numpy.frombuffer(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module numpy , or try the search function .

Example 1
Project: disentangling_conditional_gans   Author: zalandoresearch   File: dataset_tool.py    License: MIT License 6 votes vote down vote up
def create_mnist(tfrecord_dir, mnist_dir):
    print('Loading MNIST from "%s"' % mnist_dir)
    import gzip
    with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
        images = np.frombuffer(file.read(), np.uint8, offset=16)
    with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file:
        labels = np.frombuffer(file.read(), np.uint8, offset=8)
    images = images.reshape(-1, 1, 28, 28)
    images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0)
    assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8
    assert labels.shape == (60000,) and labels.dtype == np.uint8
    assert np.min(images) == 0 and np.max(images) == 255
    assert np.min(labels) == 0 and np.max(labels) == 9
    onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
    onehot[np.arange(labels.size), labels] = 1.0
    
    with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
        order = tfr.choose_shuffled_order()
        for idx in range(order.size):
            tfr.add_image(images[order[idx]])
        tfr.add_labels(onehot[order])

#---------------------------------------------------------------------------- 
Example 2
Project: disentangling_conditional_gans   Author: zalandoresearch   File: dataset_tool.py    License: MIT License 6 votes vote down vote up
def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123):
    print('Loading MNIST from "%s"' % mnist_dir)
    import gzip
    with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
        images = np.frombuffer(file.read(), np.uint8, offset=16)
    images = images.reshape(-1, 28, 28)
    images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0)
    assert images.shape == (60000, 32, 32) and images.dtype == np.uint8
    assert np.min(images) == 0 and np.max(images) == 255
    
    with TFRecordExporter(tfrecord_dir, num_images) as tfr:
        rnd = np.random.RandomState(random_seed)
        for idx in range(num_images):
            tfr.add_image(images[rnd.randint(images.shape[0], size=3)])

#---------------------------------------------------------------------------- 
Example 3
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: base.py    License: Apache License 2.0 6 votes vote down vote up
def ctypes2numpy_shared(cptr, shape):
    """Convert a ctypes pointer to a numpy array.

    The resulting NumPy array shares the memory with the pointer.

    Parameters
    ----------
    cptr : ctypes.POINTER(mx_float)
        pointer to the memory region

    shape : tuple
        Shape of target `NDArray`.

    Returns
    -------
    out : numpy_array
        A numpy array : numpy array.
    """
    if not isinstance(cptr, ctypes.POINTER(mx_float)):
        raise RuntimeError('expected float pointer')
    size = 1
    for s in shape:
        size *= s
    dbuffer = (mx_float * size).from_address(ctypes.addressof(cptr.contents))
    return np.frombuffer(dbuffer, dtype=np.float32).reshape(shape) 
Example 4
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: datasets.py    License: Apache License 2.0 6 votes vote down vote up
def _get_data(self):
        if self._train:
            data, label = self._train_data, self._train_label
        else:
            data, label = self._test_data, self._test_label

        namespace = 'gluon/dataset/'+self._namespace
        data_file = download(_get_repo_file_url(namespace, data[0]),
                             path=self._root,
                             sha1_hash=data[1])
        label_file = download(_get_repo_file_url(namespace, label[0]),
                              path=self._root,
                              sha1_hash=label[1])

        with gzip.open(label_file, 'rb') as fin:
            struct.unpack(">II", fin.read(8))
            label = np.frombuffer(fin.read(), dtype=np.uint8).astype(np.int32)

        with gzip.open(data_file, 'rb') as fin:
            struct.unpack(">IIII", fin.read(16))
            data = np.frombuffer(fin.read(), dtype=np.uint8)
            data = data.reshape(len(label), 28, 28, 1)

        self._data = nd.array(data, dtype=data.dtype)
        self._label = label 
Example 5
Project: DOTA_models   Author: ringringyi   File: download_and_convert_mnist.py    License: Apache License 2.0 6 votes vote down vote up
def _extract_images(filename, num_images):
  """Extract the images into a numpy array.

  Args:
    filename: The path to an MNIST images file.
    num_images: The number of images in the file.

  Returns:
    A numpy array of shape [number_of_images, height, width, channels].
  """
  print('Extracting images from: ', filename)
  with gzip.open(filename) as bytestream:
    bytestream.read(16)
    buf = bytestream.read(
        _IMAGE_SIZE * _IMAGE_SIZE * num_images * _NUM_CHANNELS)
    data = np.frombuffer(buf, dtype=np.uint8)
    data = data.reshape(num_images, _IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)
  return data 
Example 6
Project: DOTA_models   Author: ringringyi   File: download_and_convert_mnist.py    License: Apache License 2.0 6 votes vote down vote up
def _extract_labels(filename, num_labels):
  """Extract the labels into a vector of int64 label IDs.

  Args:
    filename: The path to an MNIST labels file.
    num_labels: The number of labels in the file.

  Returns:
    A numpy array of shape [number_of_labels]
  """
  print('Extracting labels from: ', filename)
  with gzip.open(filename) as bytestream:
    bytestream.read(8)
    buf = bytestream.read(1 * num_labels)
    labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
  return labels 
Example 7
Project: IntroToDeepLearning   Author: robb-brown   File: input_data.py    License: MIT License 6 votes vote down vote up
def extract_images(filename):
  """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2051:
      raise ValueError(
          'Invalid magic number %d in MNIST image file: %s' %
          (magic, filename))
    num_images = _read32(bytestream)
    rows = _read32(bytestream)
    cols = _read32(bytestream)
    buf = bytestream.read(rows * cols * num_images)
    data = numpy.frombuffer(buf, dtype=numpy.uint8)
    data = data.reshape(num_images, rows, cols, 1)
    return data 
Example 8
Project: IntroToDeepLearning   Author: robb-brown   File: input_data.py    License: MIT License 6 votes vote down vote up
def extract_images(filename):
  """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2051:
      raise ValueError(
          'Invalid magic number %d in MNIST image file: %s' %
          (magic, filename))
    num_images = _read32(bytestream)
    rows = _read32(bytestream)
    cols = _read32(bytestream)
    buf = bytestream.read(rows * cols * num_images)
    data = numpy.frombuffer(buf, dtype=numpy.uint8)
    data = data.reshape(num_images, rows, cols, 1)
    return data 
Example 9
Project: QCElemental   Author: MolSSI   File: serialization.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def msgpackext_decode(obj: Any) -> Any:
    """
    Decodes a msgpack objects from a dictionary representation.

    Parameters
    ----------
    obj : Any
        An encoded object, likely a dictionary.

    Returns
    -------
    Any
        The decoded form of the object.
    """

    if b"_nd_" in obj:
        arr = np.frombuffer(obj[b"data"], dtype=obj[b"dtype"])
        if b"shape" in obj:
            arr.shape = obj[b"shape"]

        return arr

    return obj 
Example 10
Project: fine-lm   Author: akzaidi   File: mnist.py    License: MIT License 6 votes vote down vote up
def _extract_mnist_images(filename, num_images):
  """Extract images from an MNIST file into a numpy array.

  Args:
    filename: The path to an MNIST images file.
    num_images: The number of images in the file.

  Returns:
    A numpy array of shape [number_of_images, height, width, channels].
  """
  with gzip.open(filename) as bytestream:
    bytestream.read(16)
    buf = bytestream.read(_MNIST_IMAGE_SIZE * _MNIST_IMAGE_SIZE * num_images)
    data = np.frombuffer(buf, dtype=np.uint8)
    data = data.reshape(num_images, _MNIST_IMAGE_SIZE, _MNIST_IMAGE_SIZE, 1)
  return data 
Example 11
Project: gnocchi   Author: gnocchixyz   File: carbonara.py    License: Apache License 2.0 6 votes vote down vote up
def unserialize(cls, data, block_size, back_window):
        uncompressed = lz4.block.decompress(data)
        nb_points = (
            len(uncompressed) // cls._SERIALIZATION_TIMESTAMP_VALUE_LEN
        )

        try:
            timestamps = numpy.frombuffer(uncompressed, dtype='<Q',
                                          count=nb_points)
            values = numpy.frombuffer(
                uncompressed, dtype='<d',
                offset=nb_points * cls._SERIALIZATION_TIMESTAMP_LEN)
        except ValueError:
            raise InvalidData

        return cls.from_data(
            numpy.cumsum(timestamps),
            values,
            block_size=block_size,
            back_window=back_window) 
Example 12
Project: python-esppy   Author: sassoftware   File: mnist_input_data.py    License: Apache License 2.0 6 votes vote down vote up
def extract_images(filename):
  """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
  print('Extracting %s' % filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2051:
      raise ValueError(
          'Invalid magic number %d in MNIST image file: %s' %
          (magic, filename))
    num_images = _read32(bytestream)
    rows = _read32(bytestream)
    cols = _read32(bytestream)
    buf = bytestream.read(rows * cols * num_images)
    data = numpy.frombuffer(buf, dtype=numpy.uint8)
    data = data.reshape(num_images, rows, cols, 1)
    return data 
Example 13
Project: dataflow   Author: tensorpack   File: mnist.py    License: Apache License 2.0 6 votes vote down vote up
def extract_images(filename):
    """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2051:
            raise ValueError(
                'Invalid magic number %d in MNIST image file: %s' %
                (magic, filename))
        num_images = _read32(bytestream)
        rows = _read32(bytestream)
        cols = _read32(bytestream)
        buf = bytestream.read(rows * cols * num_images)
        data = numpy.frombuffer(buf, dtype=numpy.uint8)
        data = data.reshape(num_images, rows, cols, 1)
        data = data.astype('float32') / 255.0
        return data 
Example 14
Project: chainerrl   Author: chainer   File: async_.py    License: MIT License 6 votes vote down vote up
def set_shared_params(a, b):
    """Set shared params (and persistent values) to a link.

    Args:
      a (chainer.Link): link whose params are to be replaced
      b (dict): dict that consists of (param_name, multiprocessing.Array)
    """
    assert isinstance(a, chainer.Link)
    remaining_keys = set(b.keys())
    for param_name, param in a.namedparams():
        if param_name in b:
            shared_param = b[param_name]
            param.array = np.frombuffer(
                shared_param, dtype=param.dtype).reshape(param.shape)
            remaining_keys.remove(param_name)
    for persistent_name, _ in chainerrl.misc.namedpersistent(a):
        if persistent_name in b:
            _set_persistent_values_recursively(
                a, persistent_name, b[persistent_name])
            remaining_keys.remove(persistent_name)
    assert not remaining_keys 
Example 15
Project: hsds   Author: HDFGroup   File: arrayUtil.py    License: Apache License 2.0 6 votes vote down vote up
def bytesToArray(data, dt, shape):
    #print(f"bytesToArray({len(data)}, {dt}, {shape}")
    nelements = getNumElements(shape)
    if not isVlen(dt):
        # regular numpy from string
        arr = np.frombuffer(data, dtype=dt)
    else:
        arr = np.zeros((nelements,), dtype=dt)
        offset = 0
        for index in range(nelements):
            offset = readElement(data, offset, arr, index, dt)
    arr = arr.reshape(shape)
    # check that we can update the array if needed
    # Note: this seems to have been required starting with numpuy v 1.17
    # Setting the flag directly is not recommended. cf: https://github.com/numpy/numpy/issues/9440

    if not arr.flags['WRITEABLE']:
        arr_copy = arr.copy()
        arr = arr_copy

    return arr 
Example 16
Project: Attentive-Filtering-Network   Author: jefflai108   File: kaldi_io.py    License: MIT License 6 votes vote down vote up
def read_vec_int(file_or_fd):
  """ [int-vec] = read_vec_int(file_or_fd)
   Read kaldi integer vector, ascii or binary input,
  """
  fd = open_or_fd(file_or_fd)
  binary = fd.read(2).decode()
  if binary == '\0B': # binary flag
    assert(fd.read(1).decode() == '\4'); # int-size
    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
    # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
    vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
    assert(vec[0]['size'] == 4) # int32 size,
    ans = vec[:]['value'] # values are in 2nd column,
  else: # ascii,
    arr = (binary + fd.readline().decode()).strip().split()
    try:
      arr.remove('['); arr.remove(']') # optionally
    except ValueError:
      pass
    ans = np.array(arr, dtype=int)
  if fd is not file_or_fd : fd.close() # cleanup
  return ans

# Writing, 
Example 17
Project: Attentive-Filtering-Network   Author: jefflai108   File: kaldi_io.py    License: MIT License 6 votes vote down vote up
def _read_mat_binary(fd):
  # Data type
  header = fd.read(3).decode()
  # 'CM', 'CM2', 'CM3' are possible values,
  if header.startswith('CM'): return _read_compressed_mat(fd, header)
  elif header == 'FM ': sample_size = 4 # floats
  elif header == 'DM ': sample_size = 8 # doubles
  else: raise UnknownMatrixHeader("The header contained '%s'" % header)
  assert(sample_size > 0)
  # Dimensions
  s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
  # Read whole matrix
  buf = fd.read(rows * cols * sample_size)
  if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
  elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
  else : raise BadSampleSize
  mat = np.reshape(vec,(rows,cols))
  return mat 
Example 18
Project: baseband   Author: mhvk   File: test_sequentialfile.py    License: GNU General Public License v3.0 6 votes vote down vote up
def _setup(self, tmpdir):
        self.data = b'abcdefghijklmnopqrstuvwxyz'
        self.uint8_data = np.frombuffer(self.data, dtype=np.uint8)
        self.size = len(self.data)
        self.files = [str(tmpdir.join('file{:1d}.raw'.format(i)))
                      for i in range(3)]
        self.max_file_size = 10
        self.sizes = []
        self.offsets = [0]
        offset = 0
        for filename in self.files:
            with open(filename, 'wb') as fw:
                part = self.data[offset:offset+self.max_file_size]
                fw.write(part)
                self.sizes.append(len(part))
                self.offsets.append(self.offsets[-1] + len(part))
            offset += self.max_file_size 
Example 19
Project: Make_Money_with_Tensorflow   Author: llSourcell   File: mnist_input_data.py    License: GNU General Public License v3.0 6 votes vote down vote up
def extract_images(filename):
  """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
  print('Extracting %s' % filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2051:
      raise ValueError(
          'Invalid magic number %d in MNIST image file: %s' %
          (magic, filename))
    num_images = _read32(bytestream)
    rows = _read32(bytestream)
    cols = _read32(bytestream)
    buf = bytestream.read(rows * cols * num_images)
    data = numpy.frombuffer(buf, dtype=numpy.uint8)
    data = data.reshape(num_images, rows, cols, 1)
    return data 
Example 20
Project: indras_net   Author: gcallah   File: markov.py    License: GNU General Public License v3.0 5 votes vote down vote up
def from_json(self, json_input):
        # get the encoded json dump
        enc = json_input["matrix"]
        
        # build the numpy data type
        dataType = np.dtype(enc[0])
        
        # decode the base64 encoded numpy array data and create a new numpy array with this data & type
        dataArray = np.frombuffer(base64.decodestring(enc[1].encode('utf-8')), dataType)
        
        # if the array had more than one data set it has to be reshaped
        if len(enc) > 2:
             dataArray = dataArray.reshape(enc[2]) # return the reshaped numpy array containing several data sets
             
        self.matrix = np.matrix(dataArray) 
Example 21
Project: DDPAE-video-prediction   Author: jthsieh   File: moving_mnist.py    License: MIT License 5 votes vote down vote up
def load_mnist(root):
  # Load MNIST dataset for generating training data.
  path = os.path.join(root, 'train-images-idx3-ubyte.gz')
  with gzip.open(path, 'rb') as f:
    mnist = np.frombuffer(f.read(), np.uint8, offset=16)
    mnist = mnist.reshape(-1, 28, 28)
  return mnist 
Example 22
Project: BiblioPixelAnimations   Author: ManiacalLabs   File: kimotion.py    License: MIT License 5 votes vote down vote up
def run(self):
        while not self.stopped():
            d = self.ws.recv()
            d = np.frombuffer(d, dtype=self.dt)
            self._reading.wait()
            self._data[1 if self._buf else 0] = d
            self._buf = not self._buf
        self.ws.close() 
Example 23
Project: deep-learning-note   Author: wdxtub   File: dataset.py    License: MIT License 5 votes vote down vote up
def _load_label(file_name):
    file_path = dataset_dir + "/" + file_name

    print("Converting " + file_name + " to NumPy Array ...")
    with gzip.open(file_path, 'rb') as f:
        labels = np.frombuffer(f.read(), np.uint8, offset=8)
    print("Done")

    return labels 
Example 24
Project: deep-learning-note   Author: wdxtub   File: dataset.py    License: MIT License 5 votes vote down vote up
def _load_img(file_name):
    file_path = dataset_dir + "/" + file_name

    print("Converting " + file_name + " to NumPy Array ...")
    with gzip.open(file_path, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    data = data.reshape(-1, img_size)
    print("Done")

    return data 
Example 25
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: recordio.py    License: Apache License 2.0 5 votes vote down vote up
def unpack(s):
    """Unpack a MXImageRecord to string.

    Parameters
    ----------
    s : str
        String buffer from ``MXRecordIO.read``.

    Returns
    -------
    header : IRHeader
        Header of the image record.
    s : str
        Unpacked string.

    Examples
    --------
    >>> record = mx.recordio.MXRecordIO('test.rec', 'r')
    >>> item = record.read()
    >>> header, s = mx.recordio.unpack(item)
    >>> header
    HEADER(flag=0, label=14.0, id=20129312, id2=0)
    """
    header = IRHeader(*struct.unpack(_IR_FORMAT, s[:_IR_SIZE]))
    s = s[_IR_SIZE:]
    if header.flag > 0:
        header = header._replace(label=np.frombuffer(s, np.float32, header.flag))
        s = s[header.flag*4:]
    return header, s 
Example 26
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: datasets.py    License: Apache License 2.0 5 votes vote down vote up
def _read_batch(self, filename):
        with open(filename, 'rb') as fin:
            data = np.frombuffer(fin.read(), dtype=np.uint8).reshape(-1, 3072+1)

        return data[:, 1:].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1), \
               data[:, 0].astype(np.int32) 
Example 27
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: test_utils.py    License: Apache License 2.0 5 votes vote down vote up
def get_mnist():
    """Download and load the MNIST dataset

    Returns
    -------
    dict
        A dict containing the data
    """
    def read_data(label_url, image_url):
        with gzip.open(mx.test_utils.download(label_url)) as flbl:
            struct.unpack(">II", flbl.read(8))
            label = np.frombuffer(flbl.read(), dtype=np.int8)
        with gzip.open(mx.test_utils.download(image_url), 'rb') as fimg:
            _, _, rows, cols = struct.unpack(">IIII", fimg.read(16))
            image = np.frombuffer(fimg.read(), dtype=np.uint8).reshape(len(label), rows, cols)
            image = image.reshape(image.shape[0], 1, 28, 28).astype(np.float32)/255
        return (label, image)

    # changed to mxnet.io for more stable hosting
    # path = 'http://yann.lecun.com/exdb/mnist/'
    path = 'http://data.mxnet.io/data/mnist/'
    (train_lbl, train_img) = read_data(
        path+'train-labels-idx1-ubyte.gz', path+'train-images-idx3-ubyte.gz')
    (test_lbl, test_img) = read_data(
        path+'t10k-labels-idx1-ubyte.gz', path+'t10k-images-idx3-ubyte.gz')
    return {'train_data':train_img, 'train_label':train_lbl,
            'test_data':test_img, 'test_label':test_lbl} 
Example 28
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: io.py    License: Apache License 2.0 5 votes vote down vote up
def getindex(self):
        index_size = ctypes.c_uint64(0)
        index_data = ctypes.POINTER(ctypes.c_uint64)()
        check_call(_LIB.MXDataIterGetIndex(self.handle,
                                           ctypes.byref(index_data),
                                           ctypes.byref(index_size)))
        if index_size.value:
            address = ctypes.addressof(index_data.contents)
            dbuffer = (ctypes.c_uint64* index_size.value).from_address(address)
            np_index = np.frombuffer(dbuffer, dtype=np.uint64)
            return np_index.copy()
        else:
            return None 
Example 29
Project: clock-recovery   Author: mossmann   File: wpcr.py    License: MIT License 5 votes vote down vote up
def read_from_stdin():
    return numpy.frombuffer(sys.stdin.buffer.read(), dtype=numpy.float32)

# If called directly from command line, take input file (or stdin) as a stream
# of floats and print binary symbols found therein. 
Example 30
Project: clock-recovery   Author: mossmann   File: dsss-bpsk-reverse.py    License: MIT License 5 votes vote down vote up
def read_from_stdin():
    return numpy.frombuffer(sys.stdin.buffer.read(), dtype=numpy.complex64, count=max_samples)

# If called directly from command line, take input file (or stdin) as a stream
# of floats and print binary symbols found therein.