org.apache.parquet.column.values.bitpacking.Packer Java Examples
The following examples show how to use
org.apache.parquet.column.values.bitpacking.Packer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DeltaBinaryPackingValuesReader.java From parquet-mr with Apache License 2.0 | 6 votes |
private void loadNewBlockToBuffer() throws IOException { try { minDeltaInCurrentBlock = BytesUtils.readZigZagVarLong(in); } catch (IOException e) { throw new ParquetDecodingException("can not read min delta in current block", e); } readBitWidthsForMiniBlocks(); // mini block is atomic for reading, we read a mini block when there are more values left int i; for (i = 0; i < config.miniBlockNumInABlock && valuesBuffered < totalValueCount; i++) { BytePackerForLong packer = Packer.LITTLE_ENDIAN.newBytePackerForLong(bitWidths[i]); unpackMiniBlock(packer); } //calculate values from deltas unpacked for current block int valueUnpacked=i*config.miniBlockSizeInValues; for (int j = valuesBuffered-valueUnpacked; j < valuesBuffered; j++) { int index = j; valuesBuffer[index] += minDeltaInCurrentBlock + valuesBuffer[index - 1]; } }
Example #2
Source File: TestRunLengthBitPackingHybridEncoder.java From parquet-mr with Apache License 2.0 | 6 votes |
private static List<Integer> unpack(int bitWidth, int numValues, ByteArrayInputStream is) throws Exception { BytePacker packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth); int[] unpacked = new int[8]; byte[] next8Values = new byte[bitWidth]; List<Integer> values = new ArrayList<>(numValues); while(values.size() < numValues) { for (int i = 0; i < bitWidth; i++) { next8Values[i] = (byte) is.read(); } packer.unpack8Values(next8Values, 0, unpacked, 0); for (int v = 0; v < 8; v++) { values.add(unpacked[v]); } } return values; }
Example #3
Source File: BaseVectorizedParquetValuesReader.java From iceberg with Apache License 2.0 | 5 votes |
/** * Initializes the internal state for decoding ints of `bitWidth`. */ private void init(int bw) { Preconditions.checkArgument(bw >= 0 && bw <= 32, "bitWidth must be >= 0 and <= 32"); this.bitWidth = bw; this.bytesWidth = BytesUtils.paddedByteCountFromBits(bw); this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bw); }
Example #4
Source File: RunLengthDecoder.java From flink with Apache License 2.0 | 5 votes |
/** * Initializes the internal state for decoding ints of `bitWidth`. */ private void initWidthAndPacker(int bitWidth) { Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32"); this.bitWidth = bitWidth; this.bytesWidth = BytesUtils.paddedByteCountFromBits(bitWidth); this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth); }
Example #5
Source File: RunLengthBitPackingHybridEncoder.java From parquet-mr with Apache License 2.0 | 5 votes |
public RunLengthBitPackingHybridEncoder(int bitWidth, int initialCapacity, int pageSize, ByteBufferAllocator allocator) { LOG.debug("Encoding: RunLengthBitPackingHybridEncoder with " + "bithWidth: {} initialCapacity {}", bitWidth, initialCapacity); Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32"); this.bitWidth = bitWidth; this.baos = new CapacityByteArrayOutputStream(initialCapacity, pageSize, allocator); this.packBuffer = new byte[bitWidth]; this.bufferedValues = new int[8]; this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth); reset(false); }
Example #6
Source File: RunLengthBitPackingHybridDecoder.java From parquet-mr with Apache License 2.0 | 5 votes |
public RunLengthBitPackingHybridDecoder(int bitWidth, InputStream in) { LOG.debug("decoding bitWidth {}", bitWidth); Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32"); this.bitWidth = bitWidth; this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth); this.in = in; }
Example #7
Source File: DeltaBinaryPackingValuesWriterForLong.java From parquet-mr with Apache License 2.0 | 5 votes |
private void flushBlockBuffer() { // since we store the min delta, the deltas will be converted to be the difference to min delta // and all positive for (int i = 0; i < deltaValuesToFlush; i++) { deltaBlockBuffer[i] = deltaBlockBuffer[i] - minDeltaInCurrentBlock; } writeMinDelta(); int miniBlocksToFlush = getMiniBlockCountToFlush(deltaValuesToFlush); calculateBitWidthsForDeltaBlockBuffer(miniBlocksToFlush); for (int i = 0; i < config.miniBlockNumInABlock; i++) { writeBitWidthForMiniBlock(i); } for (int i = 0; i < miniBlocksToFlush; i++) { // writing i th miniblock int currentBitWidth = bitWidths[i]; int blockOffset = 0; // TODO: should this cache the packer? BytePackerForLong packer = Packer.LITTLE_ENDIAN.newBytePackerForLong(currentBitWidth); int miniBlockStart = i * config.miniBlockSizeInValues; // pack values into the miniblock buffer, 8 at a time to get exactly currentBitWidth bytes for (int j = miniBlockStart; j < (i + 1) * config.miniBlockSizeInValues; j += 8) { // mini block is atomic in terms of flushing // This may write more values when reach to the end of data writing to last mini block, // since it may not be aligned to miniblock, // but doesn't matter. The reader uses total count to see if reached the end. packer.pack8Values(deltaBlockBuffer, j, miniBlockByteBuffer, blockOffset); blockOffset += currentBitWidth; } baos.write(miniBlockByteBuffer, 0, blockOffset); } minDeltaInCurrentBlock = Long.MAX_VALUE; deltaValuesToFlush = 0; }
Example #8
Source File: DeltaBinaryPackingValuesWriterForInteger.java From parquet-mr with Apache License 2.0 | 5 votes |
private void flushBlockBuffer() { // since we store the min delta, the deltas will be converted to be the difference to min delta // and all positive for (int i = 0; i < deltaValuesToFlush; i++) { deltaBlockBuffer[i] = deltaBlockBuffer[i] - minDeltaInCurrentBlock; } writeMinDelta(); int miniBlocksToFlush = getMiniBlockCountToFlush(deltaValuesToFlush); calculateBitWidthsForDeltaBlockBuffer(miniBlocksToFlush); for (int i = 0; i < config.miniBlockNumInABlock; i++) { writeBitWidthForMiniBlock(i); } for (int i = 0; i < miniBlocksToFlush; i++) { // writing i th miniblock int currentBitWidth = bitWidths[i]; int blockOffset = 0; BytePacker packer = Packer.LITTLE_ENDIAN.newBytePacker(currentBitWidth); int miniBlockStart = i * config.miniBlockSizeInValues; for (int j = miniBlockStart; j < (i + 1) * config.miniBlockSizeInValues; j += 8) {//8 values per pack // mini block is atomic in terms of flushing // This may write more values when reach to the end of data writing to last mini block, // since it may not be aligned to miniblock, // but doesn't matter. The reader uses total count to see if reached the end. packer.pack8Values(deltaBlockBuffer, j, miniBlockByteBuffer, blockOffset); blockOffset += currentBitWidth; } baos.write(miniBlockByteBuffer, 0, blockOffset); } minDeltaInCurrentBlock = Integer.MAX_VALUE; deltaValuesToFlush = 0; }