org.apache.parquet.column.values.bitpacking.Packer Java Examples

The following examples show how to use org.apache.parquet.column.values.bitpacking.Packer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DeltaBinaryPackingValuesReader.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void loadNewBlockToBuffer() throws IOException {
  try {
    minDeltaInCurrentBlock = BytesUtils.readZigZagVarLong(in);
  } catch (IOException e) {
    throw new ParquetDecodingException("can not read min delta in current block", e);
  }

  readBitWidthsForMiniBlocks();

  // mini block is atomic for reading, we read a mini block when there are more values left
  int i;
  for (i = 0; i < config.miniBlockNumInABlock && valuesBuffered < totalValueCount; i++) {
    BytePackerForLong packer = Packer.LITTLE_ENDIAN.newBytePackerForLong(bitWidths[i]);
    unpackMiniBlock(packer);
  }

  //calculate values from deltas unpacked for current block
  int valueUnpacked=i*config.miniBlockSizeInValues;
  for (int j = valuesBuffered-valueUnpacked; j < valuesBuffered; j++) {
    int index = j;
    valuesBuffer[index] += minDeltaInCurrentBlock + valuesBuffer[index - 1];
  }
}
 
Example #2
Source File: TestRunLengthBitPackingHybridEncoder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private static List<Integer> unpack(int bitWidth, int numValues, ByteArrayInputStream is)
  throws Exception {

  BytePacker packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
  int[] unpacked = new int[8];
  byte[] next8Values = new byte[bitWidth];

  List<Integer> values = new ArrayList<>(numValues);

  while(values.size() < numValues) {
    for (int i = 0; i < bitWidth; i++) {
      next8Values[i] = (byte) is.read();
    }

    packer.unpack8Values(next8Values, 0, unpacked, 0);

    for (int v = 0; v < 8; v++) {
      values.add(unpacked[v]);
    }
  }

  return values;
}
 
Example #3
Source File: BaseVectorizedParquetValuesReader.java    From iceberg with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes the internal state for decoding ints of `bitWidth`.
 */
private void init(int bw) {
  Preconditions.checkArgument(bw >= 0 && bw <= 32, "bitWidth must be >= 0 and <= 32");
  this.bitWidth = bw;
  this.bytesWidth = BytesUtils.paddedByteCountFromBits(bw);
  this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bw);
}
 
Example #4
Source File: RunLengthDecoder.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes the internal state for decoding ints of `bitWidth`.
 */
private void initWidthAndPacker(int bitWidth) {
	Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32");
	this.bitWidth = bitWidth;
	this.bytesWidth = BytesUtils.paddedByteCountFromBits(bitWidth);
	this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
}
 
Example #5
Source File: RunLengthBitPackingHybridEncoder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public RunLengthBitPackingHybridEncoder(int bitWidth, int initialCapacity, int pageSize, ByteBufferAllocator allocator) {
  LOG.debug("Encoding: RunLengthBitPackingHybridEncoder with "
    + "bithWidth: {} initialCapacity {}", bitWidth, initialCapacity);

  Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32");

  this.bitWidth = bitWidth;
  this.baos = new CapacityByteArrayOutputStream(initialCapacity, pageSize, allocator);
  this.packBuffer = new byte[bitWidth];
  this.bufferedValues = new int[8];
  this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
  reset(false);
}
 
Example #6
Source File: RunLengthBitPackingHybridDecoder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public RunLengthBitPackingHybridDecoder(int bitWidth, InputStream in) {
  LOG.debug("decoding bitWidth {}", bitWidth);

  Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32");
  this.bitWidth = bitWidth;
  this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
  this.in = in;
}
 
Example #7
Source File: DeltaBinaryPackingValuesWriterForLong.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void flushBlockBuffer() {
  // since we store the min delta, the deltas will be converted to be the difference to min delta
  // and all positive
  for (int i = 0; i < deltaValuesToFlush; i++) {
    deltaBlockBuffer[i] = deltaBlockBuffer[i] - minDeltaInCurrentBlock;
  }

  writeMinDelta();
  int miniBlocksToFlush = getMiniBlockCountToFlush(deltaValuesToFlush);

  calculateBitWidthsForDeltaBlockBuffer(miniBlocksToFlush);
  for (int i = 0; i < config.miniBlockNumInABlock; i++) {
    writeBitWidthForMiniBlock(i);
  }

  for (int i = 0; i < miniBlocksToFlush; i++) {
    // writing i th miniblock
    int currentBitWidth = bitWidths[i];
    int blockOffset = 0;
    // TODO: should this cache the packer?
    BytePackerForLong packer = Packer.LITTLE_ENDIAN.newBytePackerForLong(currentBitWidth);
    int miniBlockStart = i * config.miniBlockSizeInValues;
    // pack values into the miniblock buffer, 8 at a time to get exactly currentBitWidth bytes
    for (int j = miniBlockStart; j < (i + 1) * config.miniBlockSizeInValues; j += 8) {
      // mini block is atomic in terms of flushing
      // This may write more values when reach to the end of data writing to last mini block,
      // since it may not be aligned to miniblock,
      // but doesn't matter. The reader uses total count to see if reached the end.
      packer.pack8Values(deltaBlockBuffer, j, miniBlockByteBuffer, blockOffset);
      blockOffset += currentBitWidth;
    }
    baos.write(miniBlockByteBuffer, 0, blockOffset);
  }

  minDeltaInCurrentBlock = Long.MAX_VALUE;
  deltaValuesToFlush = 0;
}
 
Example #8
Source File: DeltaBinaryPackingValuesWriterForInteger.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void flushBlockBuffer() {
  // since we store the min delta, the deltas will be converted to be the difference to min delta
  // and all positive
  for (int i = 0; i < deltaValuesToFlush; i++) {
    deltaBlockBuffer[i] = deltaBlockBuffer[i] - minDeltaInCurrentBlock;
  }

  writeMinDelta();
  int miniBlocksToFlush = getMiniBlockCountToFlush(deltaValuesToFlush);

  calculateBitWidthsForDeltaBlockBuffer(miniBlocksToFlush);
  for (int i = 0; i < config.miniBlockNumInABlock; i++) {
    writeBitWidthForMiniBlock(i);
  }

  for (int i = 0; i < miniBlocksToFlush; i++) {
    // writing i th miniblock
    int currentBitWidth = bitWidths[i];
    int blockOffset = 0;
    BytePacker packer = Packer.LITTLE_ENDIAN.newBytePacker(currentBitWidth);
    int miniBlockStart = i * config.miniBlockSizeInValues;
    for (int j = miniBlockStart; j < (i + 1) * config.miniBlockSizeInValues; j += 8) {//8 values per pack
      // mini block is atomic in terms of flushing
      // This may write more values when reach to the end of data writing to last mini block,
      // since it may not be aligned to miniblock,
      // but doesn't matter. The reader uses total count to see if reached the end.
      packer.pack8Values(deltaBlockBuffer, j, miniBlockByteBuffer, blockOffset);
      blockOffset += currentBitWidth;
    }
    baos.write(miniBlockByteBuffer, 0, blockOffset);
  }

  minDeltaInCurrentBlock = Integer.MAX_VALUE;
  deltaValuesToFlush = 0;
}