org.apache.parquet.bytes.ByteBufferInputStream Java Examples

The following examples show how to use org.apache.parquet.bytes.ByteBufferInputStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ColumnReaderBase.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void readPageV1(DataPageV1 page) {
  ValuesReader rlReader = page.getRlEncoding().getValuesReader(path, REPETITION_LEVEL);
  ValuesReader dlReader = page.getDlEncoding().getValuesReader(path, DEFINITION_LEVEL);
  this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
  this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
  int valueCount = page.getValueCount();
  try {
    BytesInput bytes = page.getBytes();
    LOG.debug("page size {} bytes and {} values", bytes.size(), valueCount);
    LOG.debug("reading repetition levels at 0");
    ByteBufferInputStream in = bytes.toInputStream();
    rlReader.initFromPage(valueCount, in);
    LOG.debug("reading definition levels at {}", in.position());
    dlReader.initFromPage(valueCount, in);
    LOG.debug("reading data at {}", in.position());
    initDataReader(page.getValueEncoding(), in, valueCount);
  } catch (IOException e) {
    throw new ParquetDecodingException("could not read page " + page + " in col " + path, e);
  }
  newPageInitialized(page);
}
 
Example #2
Source File: RunLengthDecoder.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Init from input stream.
 */
void initFromStream(int valueCount, ByteBufferInputStream in) throws IOException {
	this.in = in;
	if (fixedWidth) {
		// initialize for repetition and definition levels
		if (readLength) {
			int length = readIntLittleEndian();
			this.in = in.sliceStream(length);
		}
	} else {
		// initialize for values
		if (in.available() > 0) {
			initWidthAndPacker(in.read());
		}
	}
	if (bitWidth == 0) {
		// 0 bit width, treat this as an RLE run of valueCount number of 0's.
		this.mode = MODE.RLE;
		this.currentCount = valueCount;
		this.currentValue = 0;
	} else {
		this.currentCount = 0;
	}
}
 
Example #3
Source File: ByteStreamSplitValuesReaderTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testSkip() throws Exception {
  byte[] byteData = new byte[16];
  for (int i = 0; i < 16; ++i) {
    byteData[i] = (byte) 0xFF;
  }
  byteData[3] = (byte) 0x00;
  byteData[7] = (byte) 0x00;
  byteData[11] = (byte) 0x10;
  byteData[15] = (byte) 0x40;
  ByteBuffer buffer = ByteBuffer.wrap(byteData);
  ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffer);

  ByteStreamSplitValuesReaderForFloat reader = new ByteStreamSplitValuesReaderForFloat();
  reader.initFromPage(4, stream);
  reader.skip(3);
  float f = reader.readFloat();
  assertEquals(2.25f, f, 0.0f);
}
 
Example #4
Source File: AbstractColumnReader.java    From flink with Apache License 2.0 6 votes vote down vote up
private void readPageV1(DataPageV1 page) throws IOException {
	this.pageValueCount = page.getValueCount();
	ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);

	// Initialize the decoders.
	if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
		throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
	}
	int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
	this.runLenDecoder = new RunLengthDecoder(bitWidth);
	try {
		BytesInput bytes = page.getBytes();
		ByteBufferInputStream in = bytes.toInputStream();
		rlReader.initFromPage(pageValueCount, in);
		this.runLenDecoder.initFromStream(pageValueCount, in);
		prepareNewPage(page.getValueEncoding(), in);
	} catch (IOException e) {
		throw new IOException("could not read page " + page + " in col " + descriptor, e);
	}
}
 
Example #5
Source File: PageIterator.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private void initFromPage(DataPageV1 page) {
  this.triplesCount = page.getValueCount();
  ValuesReader rlReader = page.getRlEncoding().getValuesReader(desc, REPETITION_LEVEL);
  ValuesReader dlReader = page.getDlEncoding().getValuesReader(desc, DEFINITION_LEVEL);
  this.repetitionLevels = new ValuesReaderIntIterator(rlReader);
  this.definitionLevels = new ValuesReaderIntIterator(dlReader);
  try {
    BytesInput bytes = page.getBytes();
    LOG.debug("page size {} bytes and {} records", bytes.size(), triplesCount);
    LOG.debug("reading repetition levels at 0");
    ByteBufferInputStream in = bytes.toInputStream();
    rlReader.initFromPage(triplesCount, in);
    LOG.debug("reading definition levels at {}", in.position());
    dlReader.initFromPage(triplesCount, in);
    LOG.debug("reading data at {}", in.position());
    initDataReader(page.getValueEncoding(), in, page.getValueCount());
  } catch (IOException e) {
    throw new ParquetDecodingException("could not read page " + page + " in col " + desc, e);
  }
}
 
Example #6
Source File: TestDeltaByteArray.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testLengths() throws IOException {
  DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
  ValuesReader reader = new DeltaBinaryPackingValuesReader();

  Utils.writeData(writer, values);
  ByteBufferInputStream data = writer.getBytes().toInputStream();
  int[] bin = Utils.readInts(reader, data, values.length);

  // test prefix lengths
  Assert.assertEquals(0, bin[0]);
  Assert.assertEquals(7, bin[1]);
  Assert.assertEquals(7, bin[2]);

  reader = new DeltaBinaryPackingValuesReader();
  bin = Utils.readInts(reader, data, values.length);
  // test suffix lengths
  Assert.assertEquals(10, bin[0]);
  Assert.assertEquals(0, bin[1]);
  Assert.assertEquals(7, bin[2]);
}
 
Example #7
Source File: DeltaBinaryPackingValuesReader.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 * eagerly loads all the data into memory
 */
@Override
public void initFromPage(int valueCount, ByteBufferInputStream stream) throws IOException {
  this.in = stream;
  long startPos = in.position();
  this.config = DeltaBinaryPackingConfig.readConfig(in);
  this.totalValueCount = BytesUtils.readUnsignedVarInt(in);
  allocateValuesBuffer();
  bitWidths = new int[config.miniBlockNumInABlock];

  //read first value from header
  valuesBuffer[valuesBuffered++] = BytesUtils.readZigZagVarLong(in);

  while (valuesBuffered < totalValueCount) { //values Buffered could be more than totalValueCount, since we flush on a mini block basis
    loadNewBlockToBuffer();
  }
  updateNextOffset((int) (in.position() - startPos));
}
 
Example #8
Source File: BasePageIterator.java    From iceberg with Apache License 2.0 6 votes vote down vote up
protected void initFromPage(DataPageV1 initPage) {
  this.triplesCount = initPage.getValueCount();
  ValuesReader rlReader = initPage.getRlEncoding().getValuesReader(desc, ValuesType.REPETITION_LEVEL);
  this.repetitionLevels = new ValuesReaderIntIterator(rlReader);
  try {
    BytesInput bytes = initPage.getBytes();
    LOG.debug("page size {} bytes and {} records", bytes.size(), triplesCount);
    LOG.debug("reading repetition levels at 0");
    ByteBufferInputStream in = bytes.toInputStream();
    rlReader.initFromPage(triplesCount, in);
    LOG.debug("reading definition levels at {}", in.position());
    initDefinitionLevelsReader(initPage, desc, in, triplesCount);
    LOG.debug("reading data at {}", in.position());
    initDataReader(initPage.getValueEncoding(), in, initPage.getValueCount());
  } catch (IOException e) {
    throw new ParquetDecodingException("could not read page " + initPage + " in col " + desc, e);
  }
}
 
Example #9
Source File: DeltaBinaryPackingValuesWriterForIntegerTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void shouldReadAndWrite(int[] data, int length) throws IOException {
  writeData(data, length);
  reader = new DeltaBinaryPackingValuesReader();
  byte[] page = writer.getBytes().toByteArray();
  int miniBlockSize = blockSize / miniBlockNum;

  double miniBlockFlushed = Math.ceil(((double) length - 1) / miniBlockSize);
  double blockFlushed = Math.ceil(((double) length - 1) / blockSize);
  double estimatedSize = 4 * 5 //blockHeader
      + 4 * miniBlockFlushed * miniBlockSize //data(aligned to miniBlock)
      + blockFlushed * miniBlockNum //bitWidth of mini blocks
      + (5.0 * blockFlushed);//min delta for each block
  assertTrue(estimatedSize >= page.length);
  reader.initFromPage(100, ByteBufferInputStream.wrap(ByteBuffer.wrap(page)));

  for (int i = 0; i < length; i++) {
    assertEquals(data[i], reader.readInteger());
  }
}
 
Example #10
Source File: BitPackingPerfTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private static long readNTimes(byte[] bytes, int[] result, ValuesReader r)
    throws IOException {
  System.out.println();
  long t = 0;
  int N = 10;
  System.gc();
  System.out.print("                                             " + r.getClass().getSimpleName());
  System.out.print(" no gc <");
  for (int k = 0; k < N; k++) {
    long t2 = System.nanoTime();
    r.initFromPage(result.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
    for (int i = 0; i < result.length; i++) {
      result[i] = r.readInteger();
    }
    long t3 = System.nanoTime();
    t += t3 - t2;
  }
  System.out.println("> read in " + t/1000 + "µs " + (N * result.length / (t / 1000)) + " values per µs");
  verify(result);
  return t;
}
 
Example #11
Source File: BaseVectorizedParquetValuesReader.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException {
  this.inputStream = in;
  if (fixedWidth) {
    // initialize for repetition and definition levels
    if (readLength) {
      int length = readIntLittleEndian();
      this.inputStream = in.sliceStream(length);
    }
  } else {
    // initialize for values
    if (in.available() > 0) {
      init(in.read());
    }
  }
  if (bitWidth == 0) {
    // 0 bit width, treat this as an RLE run of valueCount number of 0's.
    this.mode = Mode.RLE;
    this.currentCount = valueCount;
    this.currentValue = 0;
  } else {
    this.currentCount = 0;
  }
}
 
Example #12
Source File: PrimitiveColumnReader.java    From presto with Apache License 2.0 6 votes vote down vote up
private ValuesReader initDataReader(ParquetEncoding dataEncoding, int valueCount, ByteBufferInputStream in)
{
    ValuesReader valuesReader;
    if (dataEncoding.usesDictionary()) {
        if (dictionary == null) {
            throw new ParquetDecodingException("Dictionary is missing for Page");
        }
        valuesReader = dataEncoding.getDictionaryBasedValuesReader(columnDescriptor, VALUES, dictionary);
    }
    else {
        valuesReader = dataEncoding.getValuesReader(columnDescriptor, VALUES);
    }

    try {
        valuesReader.initFromPage(valueCount, in);
        return valuesReader;
    }
    catch (IOException e) {
        throw new ParquetDecodingException("Error reading parquet page in column " + columnDescriptor, e);
    }
}
 
Example #13
Source File: PrimitiveColumnReader.java    From presto with Apache License 2.0 6 votes vote down vote up
private ValuesReader readPageV1(DataPageV1 page)
{
    ValuesReader rlReader = page.getRepetitionLevelEncoding().getValuesReader(columnDescriptor, REPETITION_LEVEL);
    ValuesReader dlReader = page.getDefinitionLevelEncoding().getValuesReader(columnDescriptor, DEFINITION_LEVEL);
    repetitionReader = new LevelValuesReader(rlReader);
    definitionReader = new LevelValuesReader(dlReader);
    try {
        ByteBufferInputStream in = toInputStream(page.getSlice());
        rlReader.initFromPage(page.getValueCount(), in);
        dlReader.initFromPage(page.getValueCount(), in);
        return initDataReader(page.getValueEncoding(), page.getValueCount(), in);
    }
    catch (IOException e) {
        throw new ParquetDecodingException("Error reading parquet page " + page + " in column " + columnDescriptor, e);
    }
}
 
Example #14
Source File: TestDictionary.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testZeroValues() throws IOException {
  FallbackValuesWriter<PlainIntegerDictionaryValuesWriter, PlainValuesWriter> cw = newPlainIntegerDictionaryValuesWriter(100, 100);
  cw.writeInteger(34);
  cw.writeInteger(34);
  getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
  DictionaryValuesReader reader = initDicReader(cw, INT32);

  // pretend there are 100 nulls. what matters is offset = bytes.length.
  ByteBuffer bytes = ByteBuffer.wrap(new byte[] {0x00, 0x01, 0x02, 0x03}); // data doesn't matter
  ByteBufferInputStream stream = ByteBufferInputStream.wrap(bytes);
  stream.skipFully(stream.available());
  reader.initFromPage(100, stream);

  // Testing the deprecated behavior of using byte arrays directly
  reader = initDicReader(cw, INT32);
  int offset = bytes.remaining();
  reader.initFromPage(100,  bytes, offset);
}
 
Example #15
Source File: DeltaBinaryPackingValuesWriterForLongTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void shouldReadAndWrite(long[] data, int length) throws IOException {
  writeData(data, length);
  reader = new DeltaBinaryPackingValuesReader();
  byte[] page = writer.getBytes().toByteArray();
  int miniBlockSize = blockSize / miniBlockNum;

  double miniBlockFlushed = Math.ceil(((double) length - 1) / miniBlockSize);
  double blockFlushed = Math.ceil(((double) length - 1) / blockSize);
  double estimatedSize = 3 * 5 + 1 * 10 //blockHeader, 3 * int + 1 * long
      + 8 * miniBlockFlushed * miniBlockSize //data(aligned to miniBlock)
      + blockFlushed * miniBlockNum //bitWidth of mini blocks
      + (10.0 * blockFlushed);//min delta for each block
  assertTrue(estimatedSize >= page.length);
  reader.initFromPage(100, ByteBufferInputStream.wrap(ByteBuffer.wrap(page)));

  for (int i = 0; i < length; i++) {
    assertEquals(data[i], reader.readLong());
  }
}
 
Example #16
Source File: BenchmarkDeltaByteArray.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkSortedStringsWithPlainValuesWriter() throws IOException {
  PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
  BinaryPlainValuesReader reader = new BinaryPlainValuesReader();

  Utils.writeData(writer, sortedVals);
  ByteBufferInputStream data = writer.getBytes().toInputStream();
  Utils.readData(reader, data, values.length);
  System.out.println("size " + data.position());
}
 
Example #17
Source File: ParquetFileReader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f, ParquetMetadataConverter converter) throws IOException {
  long fileLen = file.getLength();
  LOG.debug("File length {}", fileLen);
  int FOOTER_LENGTH_SIZE = 4;
  if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) { // MAGIC + data + footer + footerIndex + MAGIC
    throw new RuntimeException(file.toString() + " is not a Parquet file (too small length: " + fileLen + ")");
  }
  long footerLengthIndex = fileLen - FOOTER_LENGTH_SIZE - MAGIC.length;
  LOG.debug("reading footer index at {}", footerLengthIndex);

  f.seek(footerLengthIndex);
  int footerLength = readIntLittleEndian(f);
  byte[] magic = new byte[MAGIC.length];
  f.readFully(magic);
  if (!Arrays.equals(MAGIC, magic)) {
    throw new RuntimeException(file.toString() + " is not a Parquet file. expected magic number at tail " + Arrays.toString(MAGIC) + " but found " + Arrays.toString(magic));
  }
  long footerIndex = footerLengthIndex - footerLength;
  LOG.debug("read footer length: {}, footer index: {}", footerLength, footerIndex);
  if (footerIndex < MAGIC.length || footerIndex >= footerLengthIndex) {
    throw new RuntimeException("corrupted file: the footer index is not within the file: " + footerIndex);
  }
  f.seek(footerIndex);
  // Read all the footer bytes in one time to avoid multiple read operations,
  // since it can be pretty time consuming for a single read operation in HDFS.
  ByteBuffer footerBytesBuffer = ByteBuffer.allocate(footerLength);
  f.readFully(footerBytesBuffer);
  LOG.debug("Finished to read all footer bytes.");
  footerBytesBuffer.flip();
  InputStream footerBytesStream = ByteBufferInputStream.wrap(footerBytesBuffer);
  return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter());
}
 
Example #18
Source File: TestBitPackingColumn.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void validateEncodeDecode(int bitLength, int[] vals, String expected) throws IOException {
  for (PACKING_TYPE type : PACKING_TYPE.values()) {
    LOG.debug("{}", type);
    final int bound = (int)Math.pow(2, bitLength) - 1;
    ValuesWriter w = type.getWriter(bound);
    for (int i : vals) {
      w.writeInteger(i);
    }
    byte[] bytes = w.getBytes().toByteArray();
    LOG.debug("vals ("+bitLength+"): " + TestBitPacking.toString(vals));
    LOG.debug("bytes: {}", TestBitPacking.toString(bytes));
    assertEquals(type.toString(), expected, TestBitPacking.toString(bytes));
    ValuesReader r = type.getReader(bound);
    r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
    int[] result = new int[vals.length];
    for (int i = 0; i < result.length; i++) {
      result[i] = r.readInteger();
    }
    LOG.debug("result: {}", TestBitPacking.toString(result));
    assertArrayEquals(type + " result: " + TestBitPacking.toString(result), vals, result);

    // Test skipping
    r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
    for (int i = 0; i < vals.length; i += 2) {
      assertEquals(vals[i], r.readInteger());
      r.skip();
    }

    // Test n-skipping
    r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
    int skipCount;
    for (int i = 0; i < vals.length; i += skipCount + 1) {
      skipCount = (vals.length - i) / 2;
      assertEquals(vals[i], r.readInteger());
      r.skip(skipCount);
    }
  }
}
 
Example #19
Source File: TestCorruptDeltaByteArrays.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testReassemblyWithoutCorruption() throws Exception {
  DeltaByteArrayWriter writer = getDeltaByteArrayWriter();

  for (int i = 0; i < 10; i += 1) {
    writer.writeBytes(Binary.fromString(str(i)));
  }
  ByteBuffer firstPageBytes = writer.getBytes().toByteBuffer();

  writer.reset(); // sets previous to new byte[0]

  for (int i = 10; i < 20; i += 1) {
    writer.writeBytes(Binary.fromString(str(i)));
  }
  ByteBuffer secondPageBytes = writer.getBytes().toByteBuffer();

  DeltaByteArrayReader firstPageReader = new DeltaByteArrayReader();
  firstPageReader.initFromPage(10, ByteBufferInputStream.wrap(firstPageBytes));
  for (int i = 0; i < 10; i += 1) {
    assertEquals(firstPageReader.readBytes().toStringUsingUTF8(), str(i));
  }

  DeltaByteArrayReader secondPageReader = new DeltaByteArrayReader();
  secondPageReader.initFromPage(10, ByteBufferInputStream.wrap(secondPageBytes));
  secondPageReader.setPreviousReader(firstPageReader);

  for (int i = 10; i < 20; i += 1) {
    assertEquals(secondPageReader.readBytes().toStringUsingUTF8(), str(i));
  }
}
 
Example #20
Source File: Utils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static int[] readInts(ValuesReader reader, ByteBufferInputStream stream, int length)
    throws IOException {
  int[] ints = new int[length];
  reader.initFromPage(length, stream);
  for(int i=0; i < length; i++) {
    ints[i] = reader.readInteger();
  }
  return ints;
}
 
Example #21
Source File: Utils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static Binary[] readData(ValuesReader reader, ByteBufferInputStream stream, int length)
    throws IOException {
  Binary[] bins = new Binary[length];
  reader.initFromPage(length, stream);
  for(int i=0; i < length; i++) {
    bins[i] = reader.readBytes();
  }
  return bins;
}
 
Example #22
Source File: TestCorruptDeltaByteArrays.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testOldReassemblyWithoutCorruption() throws Exception {
  DeltaByteArrayWriter writer = getDeltaByteArrayWriter();

  for (int i = 0; i < 10; i += 1) {
    writer.writeBytes(Binary.fromString(str(i)));
  }
  ByteBuffer firstPageBytes = writer.getBytes().toByteBuffer();

  writer.reset(); // sets previous to new byte[0]

  for (int i = 10; i < 20; i += 1) {
    writer.writeBytes(Binary.fromString(str(i)));
  }
  ByteBuffer secondPageBytes = writer.getBytes().toByteBuffer();

  DeltaByteArrayReader firstPageReader = new DeltaByteArrayReader();
  firstPageReader.initFromPage(10, ByteBufferInputStream.wrap(firstPageBytes));
  for (int i = 0; i < 10; i += 1) {
    assertEquals(firstPageReader.readBytes().toStringUsingUTF8(), str(i));
  }

  DeltaByteArrayReader secondPageReader = new DeltaByteArrayReader();
  secondPageReader.initFromPage(10, ByteBufferInputStream.wrap(secondPageBytes));

  for (int i = 10; i < 20; i += 1) {
    assertEquals(secondPageReader.readBytes().toStringUsingUTF8(), str(i));
  }
}
 
Example #23
Source File: ByteStreamSplitValuesReaderTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void testReader(byte[] input, double[] values) throws IOException {
  ByteBuffer buffer = ByteBuffer.wrap(input);
  ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffer);
  ByteStreamSplitValuesReaderForDouble reader = new ByteStreamSplitValuesReaderForDouble();
  reader.initFromPage(values.length, stream);
  for (double expectedValue : values) {
    double d = reader.readDouble();
    assertEquals(expectedValue, d, 0.0);
  }
}
 
Example #24
Source File: ByteStreamSplitValuesReaderTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testSkipUnderflow() throws Exception {
  byte[] byteData = new byte[128];
  ByteBuffer buffer = ByteBuffer.wrap(byteData);
  ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffer);

  ByteStreamSplitValuesReaderForFloat reader = new ByteStreamSplitValuesReaderForFloat();
  reader.initFromPage(32, stream);

  try {
    reader.skip(-1);
    Assert.fail("Expected an exception.");
  } catch (ParquetDecodingException ex) {}
}
 
Example #25
Source File: ByteStreamSplitValuesReaderTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testSkipOverflow() throws Exception {
  byte[] byteData = new byte[128];
  ByteBuffer buffer = ByteBuffer.wrap(byteData);
  ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffer);

  ByteStreamSplitValuesReaderForFloat reader = new ByteStreamSplitValuesReaderForFloat();
  reader.initFromPage(32, stream);

  try {
    reader.skip(33);
    Assert.fail("Expected an exception.");
  } catch (ParquetDecodingException ex) {}
}
 
Example #26
Source File: TestDictionary.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testSkipInBinaryDictionary() throws Exception {
  ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(1000, 10000);
  writeRepeated(100, cw, "a");
  writeDistinct(100, cw, "b");
  assertEquals(PLAIN_DICTIONARY, cw.getEncoding());

  // Test skip and skip-n with dictionary encoding
  ByteBufferInputStream stream = cw.getBytes().toInputStream();
  DictionaryValuesReader cr = initDicReader(cw, BINARY);
  cr.initFromPage(200, stream);
  for (int i = 0; i < 100; i += 2) {
    assertEquals(Binary.fromString("a" + i % 10), cr.readBytes());
    cr.skip();
  }
  int skipCount;
  for (int i = 0; i < 100; i += skipCount + 1) {
    skipCount = (100 - i) / 2;
    assertEquals(Binary.fromString("b" + i), cr.readBytes());
    cr.skip(skipCount);
  }

  // Ensure fallback
  writeDistinct(1000, cw, "c");
  assertEquals(PLAIN, cw.getEncoding());

  // Test skip and skip-n with plain encoding (after fallback)
  ValuesReader plainReader = new BinaryPlainValuesReader();
  plainReader.initFromPage(1200, cw.getBytes().toInputStream());
  plainReader.skip(200);
  for (int i = 0; i < 100; i += 2) {
    assertEquals("c" + i, plainReader.readBytes().toStringUsingUTF8());
    plainReader.skip();
  }
  for (int i = 100; i < 1000; i += skipCount + 1) {
    skipCount = (1000 - i) / 2;
    assertEquals(Binary.fromString("c" + i), plainReader.readBytes());
    plainReader.skip(skipCount);
  }
}
 
Example #27
Source File: BenchmarkDeltaLengthByteArray.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithPlainValuesWriter() throws IOException {
  PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
  BinaryPlainValuesReader reader = new BinaryPlainValuesReader();

  Utils.writeData(writer, values);
  ByteBufferInputStream data = writer.getBytes().toInputStream();
  Utils.readData(reader, data, values.length);
  System.out.println("size " + data.position());
}
 
Example #28
Source File: ByteStreamSplitValuesReaderTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testExtraReads() throws Exception {
  byte[] byteData = {(byte) 0x00, (byte) 0x00, (byte) 0x10, (byte) 0x40};
  ByteBuffer buffer = ByteBuffer.wrap(byteData);
  ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffer);

  ByteStreamSplitValuesReaderForFloat reader = new ByteStreamSplitValuesReaderForFloat();
  reader.initFromPage(1, stream);
  float f = reader.readFloat();
  assertEquals(2.25f, f, 0.0f);
  try {
    reader.readFloat();
    Assert.fail("Expected an exception.");
  } catch (ParquetDecodingException ex) {}
}
 
Example #29
Source File: ByteStreamSplitValuesReaderTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void testReader(byte[] input, float[] values) throws IOException {
  ByteBuffer buffer = ByteBuffer.wrap(input);
  ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffer);
  ByteStreamSplitValuesReaderForFloat reader = new ByteStreamSplitValuesReaderForFloat();
  reader.initFromPage(values.length, stream);
  for (float expectedValue : values) {
    float f = reader.readFloat();
    assertEquals(expectedValue, f, 0.0f);
  }
}
 
Example #30
Source File: BenchmarkDeltaLengthByteArray.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter() throws IOException {
  DeltaLengthByteArrayValuesWriter writer = new DeltaLengthByteArrayValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
  DeltaLengthByteArrayValuesReader reader = new DeltaLengthByteArrayValuesReader();

  Utils.writeData(writer, values);
  ByteBufferInputStream data = writer.getBytes().toInputStream();
  Utils.readData(reader, data, values.length);
  System.out.println("size " + data.position());
}