org.apache.parquet.bytes.BytesUtils Java Examples

The following examples show how to use org.apache.parquet.bytes.BytesUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void testIntegerStats(StatsHelper helper) {
  // make fake stats and verify the size check
  IntStatistics stats = new IntStatistics();
  stats.incrementNumNulls(3004);
  int min = Integer.MIN_VALUE;
  int max = Integer.MAX_VALUE;
  stats.updateStats(min);
  stats.updateStats(max);

  org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats);

  Assert.assertEquals("Min should match",
      min, BytesUtils.bytesToInt(formatStats.getMin()));
  Assert.assertEquals("Max should match",
      max, BytesUtils.bytesToInt(formatStats.getMax()));
  Assert.assertEquals("Num nulls should match",
      3004, formatStats.getNull_count());
}
 
Example #2
Source File: RunLengthBitPackingHybridEncoder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void writeRleRun() throws IOException {
  // we may have been working on a bit-packed-run
  // so close that run if it exists before writing this
  // rle-run
  endPreviousBitPackedRun();

  // write the rle-header (lsb of 0 signifies a rle run)
  BytesUtils.writeUnsignedVarInt(repeatCount << 1, baos);
  // write the repeated-value
  BytesUtils.writeIntLittleEndianPaddedOnBitWidth(baos, previousValue, bitWidth);

  // reset the repeat count
  repeatCount = 0;

  // throw away all the buffered values, they were just repeats and they've been written
  numBufferedValues = 0;
}
 
Example #3
Source File: TestRunLengthBitPackingHybridEncoder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testBitPackingOnly() throws Exception {
  RunLengthBitPackingHybridEncoder encoder = getRunLengthBitPackingHybridEncoder();
  for (int i = 0; i < 100; i++) {
    encoder.writeInt(i % 3);
  }

  ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray());

  // header = ((104/8) << 1) | 1 = 27
  assertEquals(27, BytesUtils.readUnsignedVarInt(is));

  List<Integer> values = unpack(3, 104, is);

  for (int i = 0; i < 100; i++) {
    assertEquals(i % 3, (int) values.get(i));
  }

  // end of stream
  assertEquals(-1, is.read());
}
 
Example #4
Source File: TestByteBasedBitPackingEncoder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testSlabBoundary() {
  for (int i = 0; i <= 32; i++) {
    final ByteBasedBitPackingEncoder encoder = new ByteBasedBitPackingEncoder(i, Packer.BIG_ENDIAN);
    // make sure to write through the progression of slabs
    final int totalValues = 191 * 1024 * 8 + 10;
    for (int j = 0; j < totalValues; j++) {
      try {
        encoder.writeInt(j);
      } catch (Exception e) {
        throw new RuntimeException(i + ": error writing " + j, e);
      }
    }
    assertEquals(BytesUtils.paddedByteCountFromBits(totalValues * i), encoder.getBufferSize());
    assertEquals(i == 0 ? 1 : 9, encoder.getNumSlabs());
  }
}
 
Example #5
Source File: TestRunLengthBitPackingHybridEncoder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testPaddingZerosOnUnfinishedBitPackedRuns() throws Exception {
  RunLengthBitPackingHybridEncoder encoder = getRunLengthBitPackingHybridEncoder(5, 5, 10);
  for (int i = 0; i < 9; i++) {
    encoder.writeInt(i+1);
  }

  ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray());

  // header = ((16/8) << 1) | 1 = 5
  assertEquals(5, BytesUtils.readUnsignedVarInt(is));

  List<Integer> values = unpack(5, 16, is);

  assertEquals(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0), values);

  assertEquals(-1, is.read());
}
 
Example #6
Source File: TestRunLengthBitPackingHybridEncoder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testRepeatedZeros() throws Exception {
  // previousValue is initialized to 0
  // make sure that repeated 0s at the beginning
  // of the stream don't trip up the repeat count

  RunLengthBitPackingHybridEncoder encoder = getRunLengthBitPackingHybridEncoder();
  for (int i = 0; i < 10; i++) {
    encoder.writeInt(0);
  }

  ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray());

  // header = 10 << 1 = 20
  assertEquals(20, BytesUtils.readUnsignedVarInt(is));
  // payload = 4
  assertEquals(0, BytesUtils.readIntLittleEndianOnOneByte(is));

  // end of stream
  assertEquals(-1, is.read());
}
 
Example #7
Source File: TestRunLengthBitPackingHybridEncoder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testRLEOnly() throws Exception {
  RunLengthBitPackingHybridEncoder encoder = getRunLengthBitPackingHybridEncoder();
  for (int i = 0; i < 100; i++) {
    encoder.writeInt(4);
  }
  for (int i = 0; i < 100; i++) {
    encoder.writeInt(5);
  }

  ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray());

  // header = 100 << 1 = 200
  assertEquals(200, BytesUtils.readUnsignedVarInt(is));
  // payload = 4
  assertEquals(4, BytesUtils.readIntLittleEndianOnOneByte(is));

  // header = 100 << 1 = 200
  assertEquals(200, BytesUtils.readUnsignedVarInt(is));
  // payload = 5
  assertEquals(5, BytesUtils.readIntLittleEndianOnOneByte(is));

  // end of stream
  assertEquals(-1, is.read());
}
 
Example #8
Source File: DeltaBinaryPackingValuesReader.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 * eagerly loads all the data into memory
 */
@Override
public void initFromPage(int valueCount, ByteBufferInputStream stream) throws IOException {
  this.in = stream;
  long startPos = in.position();
  this.config = DeltaBinaryPackingConfig.readConfig(in);
  this.totalValueCount = BytesUtils.readUnsignedVarInt(in);
  allocateValuesBuffer();
  bitWidths = new int[config.miniBlockNumInABlock];

  //read first value from header
  valuesBuffer[valuesBuffered++] = BytesUtils.readZigZagVarLong(in);

  while (valuesBuffered < totalValueCount) { //values Buffered could be more than totalValueCount, since we flush on a mini block basis
    loadNewBlockToBuffer();
  }
  updateNextOffset((int) (in.position() - startPos));
}
 
Example #9
Source File: AbstractColumnReader.java    From flink with Apache License 2.0 6 votes vote down vote up
private void readPageV1(DataPageV1 page) throws IOException {
	this.pageValueCount = page.getValueCount();
	ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);

	// Initialize the decoders.
	if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
		throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
	}
	int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
	this.runLenDecoder = new RunLengthDecoder(bitWidth);
	try {
		BytesInput bytes = page.getBytes();
		ByteBufferInputStream in = bytes.toInputStream();
		rlReader.initFromPage(pageValueCount, in);
		this.runLenDecoder.initFromStream(pageValueCount, in);
		prepareNewPage(page.getValueEncoding(), in);
	} catch (IOException e) {
		throw new IOException("could not read page " + page + " in col " + descriptor, e);
	}
}
 
Example #10
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void testLongStats(StatsHelper helper) {
  // make fake stats and verify the size check
  LongStatistics stats = new LongStatistics();
  stats.incrementNumNulls(3004);
  long min = Long.MIN_VALUE;
  long max = Long.MAX_VALUE;
  stats.updateStats(min);
  stats.updateStats(max);

  org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats);

  Assert.assertEquals("Min should match",
      min, BytesUtils.bytesToLong(formatStats.getMin()));
  Assert.assertEquals("Max should match",
      max, BytesUtils.bytesToLong(formatStats.getMax()));
  Assert.assertEquals("Num nulls should match",
      3004, formatStats.getNull_count());
}
 
Example #11
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void testFloatStats(StatsHelper helper) {
  // make fake stats and verify the size check
  FloatStatistics stats = new FloatStatistics();
  stats.incrementNumNulls(3004);
  float min = Float.MIN_VALUE;
  float max = Float.MAX_VALUE;
  stats.updateStats(min);
  stats.updateStats(max);

  org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats);

  Assert.assertEquals("Min should match",
      min, Float.intBitsToFloat(BytesUtils.bytesToInt(formatStats.getMin())),
      0.000001);
  Assert.assertEquals("Max should match",
      max, Float.intBitsToFloat(BytesUtils.bytesToInt(formatStats.getMax())),
      0.000001);
  Assert.assertEquals("Num nulls should match",
      3004, formatStats.getNull_count());
}
 
Example #12
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void testDoubleStats(StatsHelper helper) {
  // make fake stats and verify the size check
  DoubleStatistics stats = new DoubleStatistics();
  stats.incrementNumNulls(3004);
  double min = Double.MIN_VALUE;
  double max = Double.MAX_VALUE;
  stats.updateStats(min);
  stats.updateStats(max);

  org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats);

  Assert.assertEquals("Min should match",
      min, Double.longBitsToDouble(BytesUtils.bytesToLong(formatStats.getMin())),
      0.000001);
  Assert.assertEquals("Max should match",
      max, Double.longBitsToDouble(BytesUtils.bytesToLong(formatStats.getMax())),
      0.000001);
  Assert.assertEquals("Num nulls should match",
      3004, formatStats.getNull_count());
}
 
Example #13
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void testBooleanStats(StatsHelper helper) {
  // make fake stats and verify the size check
  BooleanStatistics stats = new BooleanStatistics();
  stats.incrementNumNulls(3004);
  boolean min = Boolean.FALSE;
  boolean max = Boolean.TRUE;
  stats.updateStats(min);
  stats.updateStats(max);

  org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats);

  Assert.assertEquals("Min should match",
      min, BytesUtils.bytesToBool(formatStats.getMin()));
  Assert.assertEquals("Max should match",
      max, BytesUtils.bytesToBool(formatStats.getMax()));
  Assert.assertEquals("Num nulls should match",
      3004, formatStats.getNull_count());
}
 
Example #14
Source File: DeltaBinaryPackingValuesReader.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void loadNewBlockToBuffer() throws IOException {
  try {
    minDeltaInCurrentBlock = BytesUtils.readZigZagVarLong(in);
  } catch (IOException e) {
    throw new ParquetDecodingException("can not read min delta in current block", e);
  }

  readBitWidthsForMiniBlocks();

  // mini block is atomic for reading, we read a mini block when there are more values left
  int i;
  for (i = 0; i < config.miniBlockNumInABlock && valuesBuffered < totalValueCount; i++) {
    BytePackerForLong packer = Packer.LITTLE_ENDIAN.newBytePackerForLong(bitWidths[i]);
    unpackMiniBlock(packer);
  }

  //calculate values from deltas unpacked for current block
  int valueUnpacked=i*config.miniBlockSizeInValues;
  for (int j = valuesBuffered-valueUnpacked; j < valuesBuffered; j++) {
    int index = j;
    valuesBuffer[index] += minDeltaInCurrentBlock + valuesBuffer[index - 1];
  }
}
 
Example #15
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingValuesFromStats() {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  PrimitiveType type = Types.required(PrimitiveTypeName.INT32).named("test_int32");

  org.apache.parquet.format.Statistics formatStats = new org.apache.parquet.format.Statistics();
  Statistics<?> stats = converter.fromParquetStatistics(Version.FULL_VERSION, formatStats, type);
  assertFalse(stats.isNumNullsSet());
  assertFalse(stats.hasNonNullValue());
  assertTrue(stats.isEmpty());
  assertEquals(-1, stats.getNumNulls());

  formatStats.clear();
  formatStats.setMin(BytesUtils.intToBytes(-100));
  formatStats.setMax(BytesUtils.intToBytes(100));
  stats = converter.fromParquetStatistics(Version.FULL_VERSION, formatStats, type);
  assertFalse(stats.isNumNullsSet());
  assertTrue(stats.hasNonNullValue());
  assertFalse(stats.isEmpty());
  assertEquals(-1, stats.getNumNulls());
  assertEquals(-100, stats.genericGetMin());
  assertEquals(100, stats.genericGetMax());

  formatStats.clear();
  formatStats.setNull_count(2000);
  stats = converter.fromParquetStatistics(Version.FULL_VERSION, formatStats, type);
  assertTrue(stats.isNumNullsSet());
  assertFalse(stats.hasNonNullValue());
  assertFalse(stats.isEmpty());
  assertEquals(2000, stats.getNumNulls());
}
 
Example #16
Source File: ParquetFileWriter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void serializeFooter(ParquetMetadata footer, PositionOutputStream out) throws IOException {
  long footerIndex = out.getPos();
  ParquetMetadataConverter metadataConverter = new ParquetMetadataConverter();
  org.apache.parquet.format.FileMetaData parquetMetadata = metadataConverter.toParquetMetadata(CURRENT_VERSION, footer);
  writeFileMetaData(parquetMetadata, out);
  LOG.debug("{}: footer length = {}" , out.getPos(), (out.getPos() - footerIndex));
  BytesUtils.writeIntLittleEndian(out, (int) (out.getPos() - footerIndex));
  out.write(MAGIC);
}
 
Example #17
Source File: TestRunLengthBitPackingHybridEncoder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testBitPackingOverflow() throws Exception {
  RunLengthBitPackingHybridEncoder encoder = getRunLengthBitPackingHybridEncoder();

  for (int i = 0; i < 1000; i++) {
    encoder.writeInt(i % 3);
  }

  ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray());

  // 504 is the max number of values in a bit packed run
  // that still has a header of 1 byte
  // header = ((504/8) << 1) | 1 = 127
  assertEquals(127, BytesUtils.readUnsignedVarInt(is));
  List<Integer> values = unpack(3, 504, is);

  for (int i = 0; i < 504; i++) {
    assertEquals(i % 3, (int) values.get(i));
  }

  // there should now be 496 values in another bit-packed run
  // header = ((496/8) << 1) | 1 = 125
  assertEquals(125, BytesUtils.readUnsignedVarInt(is));
  values = unpack(3, 496, is);
  for (int i = 0; i < 496; i++) {
    assertEquals((i + 504) % 3, (int) values.get(i));
  }

  // end of stream
  assertEquals(-1, is.read());
}
 
Example #18
Source File: TestColumnIndexFilter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
CIBuilder addPage(long nullCount, double min, double max) {
  nullPages.add(false);
  nullCounts.add(nullCount);
  minValues.add(ByteBuffer.wrap(BytesUtils.longToBytes(Double.doubleToLongBits(min))));
  maxValues.add(ByteBuffer.wrap(BytesUtils.longToBytes(Double.doubleToLongBits(max))));
  return this;
}
 
Example #19
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static List<ByteBuffer> toBBList(Integer... values) {
  List<ByteBuffer> buffers = new ArrayList<>(values.length);
  for (Integer value : values) {
    if (value == null) {
      buffers.add(ByteBuffer.allocate(0));
    } else {
      buffers.add(ByteBuffer.wrap(BytesUtils.intToBytes(value)));
    }
  }
  return buffers;
}
 
Example #20
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static List<ByteBuffer> toBBList(Double... values) {
  List<ByteBuffer> buffers = new ArrayList<>(values.length);
  for (Double value : values) {
    if (value == null) {
      buffers.add(ByteBuffer.allocate(0));
    } else {
      buffers.add(ByteBuffer.wrap(BytesUtils.longToBytes(Double.doubleToLongBits(value))));
    }
  }
  return buffers;
}
 
Example #21
Source File: DictionaryReader.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public void initFromPage(int valueCount, ByteBufferInputStream in)
        throws IOException
{
    int bitWidth = BytesUtils.readIntLittleEndianOnOneByte(in);
    decoder = new RunLengthBitPackingHybridDecoder(bitWidth, in);
}
 
Example #22
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static List<ByteBuffer> toBBList(Boolean... values) {
  List<ByteBuffer> buffers = new ArrayList<>(values.length);
  for (Boolean value : values) {
    if (value == null) {
      buffers.add(ByteBuffer.allocate(0));
    } else {
      buffers.add(ByteBuffer.wrap(BytesUtils.booleanToBytes(value)));
    }
  }
  return buffers;
}
 
Example #23
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testColumnIndexConversion() {
  PrimitiveType type = Types.required(PrimitiveTypeName.INT64).named("test_int64");
  ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  Statistics<?> stats = Statistics.createStats(type);
  stats.incrementNumNulls(16);
  stats.updateStats(-100l);
  stats.updateStats(100l);
  builder.add(stats);
  stats = Statistics.createStats(type);
  stats.incrementNumNulls(111);
  builder.add(stats);
  stats = Statistics.createStats(type);
  stats.updateStats(200l);
  stats.updateStats(500l);
  builder.add(stats);
  org.apache.parquet.format.ColumnIndex parquetColumnIndex =
      ParquetMetadataConverter.toParquetColumnIndex(type, builder.build());
  ColumnIndex columnIndex = ParquetMetadataConverter.fromParquetColumnIndex(type, parquetColumnIndex);
  assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
  assertTrue(Arrays.asList(false, true, false).equals(columnIndex.getNullPages()));
  assertTrue(Arrays.asList(16l, 111l, 0l).equals(columnIndex.getNullCounts()));
  assertTrue(Arrays.asList(
      ByteBuffer.wrap(BytesUtils.longToBytes(-100l)),
      ByteBuffer.allocate(0),
      ByteBuffer.wrap(BytesUtils.longToBytes(200l))).equals(columnIndex.getMinValues()));
  assertTrue(Arrays.asList(
      ByteBuffer.wrap(BytesUtils.longToBytes(100l)),
      ByteBuffer.allocate(0),
      ByteBuffer.wrap(BytesUtils.longToBytes(500l))).equals(columnIndex.getMaxValues()));

  assertNull("Should handle null column index", ParquetMetadataConverter
      .toParquetColumnIndex(Types.required(PrimitiveTypeName.INT32).named("test_int32"), null));
  assertNull("Should ignore unsupported types", ParquetMetadataConverter
      .toParquetColumnIndex(Types.required(PrimitiveTypeName.INT96).named("test_int96"), columnIndex));
  assertNull("Should ignore unsupported types",
      ParquetMetadataConverter.fromParquetColumnIndex(Types.required(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
          .length(12).as(OriginalType.INTERVAL).named("test_interval"), parquetColumnIndex));
}
 
Example #24
Source File: TestColumnIndexFilter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
CIBuilder addPage(long nullCount, int min, int max) {
  nullPages.add(false);
  nullCounts.add(nullCount);
  minValues.add(ByteBuffer.wrap(BytesUtils.intToBytes(min)));
  maxValues.add(ByteBuffer.wrap(BytesUtils.intToBytes(max)));
  return this;
}
 
Example #25
Source File: TestRunLengthBitPackingHybridEncoder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testTransitionFromBitPackingToRle() throws Exception {
  RunLengthBitPackingHybridEncoder encoder = getRunLengthBitPackingHybridEncoder();

  // 5 obviously bit-packed values
  encoder.writeInt(0);
  encoder.writeInt(1);
  encoder.writeInt(0);
  encoder.writeInt(1);
  encoder.writeInt(0);

  // three repeated values, that ought to be bit-packed as well
  encoder.writeInt(2);
  encoder.writeInt(2);
  encoder.writeInt(2);

  // lots more repeated values, that should be rle-encoded
  for (int i = 0; i < 100; i++) {
    encoder.writeInt(2);
  }

  ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray());

  // header = ((8/8) << 1) | 1 = 3
  assertEquals(3, BytesUtils.readUnsignedVarInt(is));

  List<Integer> values = unpack(3, 8, is);
  assertEquals(Arrays.asList(0, 1, 0, 1, 0, 2, 2, 2), values);

  // header = 100 << 1 = 200
  assertEquals(200, BytesUtils.readUnsignedVarInt(is));
  // payload = 2
  assertEquals(2, BytesUtils.readIntLittleEndianOnOneByte(is));

  // end of stream
  assertEquals(-1, is.read());
}
 
Example #26
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static List<ByteBuffer> toBBList(Float... values) {
  List<ByteBuffer> buffers = new ArrayList<>(values.length);
  for (Float value : values) {
    if (value == null) {
      buffers.add(ByteBuffer.allocate(0));
    } else {
      buffers.add(ByteBuffer.wrap(BytesUtils.intToBytes(Float.floatToIntBits(value))));
    }
  }
  return buffers;
}
 
Example #27
Source File: TestRunLengthBitPackingHybridEncoder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testBitWidthZero() throws Exception {
  RunLengthBitPackingHybridEncoder encoder = getRunLengthBitPackingHybridEncoder(0, 5, 10);
  for (int i = 0; i < 10; i++) {
    encoder.writeInt(0);
  }

  ByteArrayInputStream is = new ByteArrayInputStream(encoder.toBytes().toByteArray());

  // header = 10 << 1 = 20
  assertEquals(20, BytesUtils.readUnsignedVarInt(is));

  // end of stream
  assertEquals(-1, is.read());
}
 
Example #28
Source File: ColumnReaderBase.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) {
  try {
    if (maxLevel == 0) {
      return new NullIntIterator();
    }
    return new RLEIntIterator(
        new RunLengthBitPackingHybridDecoder(
            BytesUtils.getWidthFromMaxInt(maxLevel),
            bytes.toInputStream()));
  } catch (IOException e) {
    throw new ParquetDecodingException("could not read levels in page for col " + path, e);
  }
}
 
Example #29
Source File: BinaryPlainValuesReader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void skip() {
  try {
    int length = BytesUtils.readIntLittleEndian(in);
    in.skipFully(length);
  } catch (IOException | RuntimeException e) {
    throw new ParquetDecodingException("could not skip bytes at offset " + in.position(), e);
  }
}
 
Example #30
Source File: BinaryPlainValuesReader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public Binary readBytes() {
  try {
    int length = BytesUtils.readIntLittleEndian(in);
    return Binary.fromConstantByteBuffer(in.slice(length));
  } catch (IOException | RuntimeException e) {
    throw new ParquetDecodingException("could not read bytes at offset " + in.position(), e);
  }
}