Java Code Examples for org.apache.parquet.column.values.ValuesReader#readInteger()

The following examples show how to use org.apache.parquet.column.values.ValuesReader#readInteger() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BitPackingPerfTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private static long readNTimes(byte[] bytes, int[] result, ValuesReader r)
    throws IOException {
  System.out.println();
  long t = 0;
  int N = 10;
  System.gc();
  System.out.print("                                             " + r.getClass().getSimpleName());
  System.out.print(" no gc <");
  for (int k = 0; k < N; k++) {
    long t2 = System.nanoTime();
    r.initFromPage(result.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
    for (int i = 0; i < result.length; i++) {
      result[i] = r.readInteger();
    }
    long t3 = System.nanoTime();
    t += t3 - t2;
  }
  System.out.println("> read in " + t/1000 + "µs " + (N * result.length / (t / 1000)) + " values per µs");
  verify(result);
  return t;
}
 
Example 2
Source File: TestBitPackingColumn.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void validateEncodeDecode(int bitLength, int[] vals, String expected) throws IOException {
  for (PACKING_TYPE type : PACKING_TYPE.values()) {
    LOG.debug("{}", type);
    final int bound = (int)Math.pow(2, bitLength) - 1;
    ValuesWriter w = type.getWriter(bound);
    for (int i : vals) {
      w.writeInteger(i);
    }
    byte[] bytes = w.getBytes().toByteArray();
    LOG.debug("vals ("+bitLength+"): " + TestBitPacking.toString(vals));
    LOG.debug("bytes: {}", TestBitPacking.toString(bytes));
    assertEquals(type.toString(), expected, TestBitPacking.toString(bytes));
    ValuesReader r = type.getReader(bound);
    r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
    int[] result = new int[vals.length];
    for (int i = 0; i < result.length; i++) {
      result[i] = r.readInteger();
    }
    LOG.debug("result: {}", TestBitPacking.toString(result));
    assertArrayEquals(type + " result: " + TestBitPacking.toString(result), vals, result);

    // Test skipping
    r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
    for (int i = 0; i < vals.length; i += 2) {
      assertEquals(vals[i], r.readInteger());
      r.skip();
    }

    // Test n-skipping
    r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
    int skipCount;
    for (int i = 0; i < vals.length; i += skipCount + 1) {
      skipCount = (vals.length - i) / 2;
      assertEquals(vals[i], r.readInteger());
      r.skip(skipCount);
    }
  }
}
 
Example 3
Source File: VarLenNullableFixedEntryReader.java    From Bats with Apache License 2.0 4 votes vote down vote up
/** {@inheritDoc} */
@Override
final VarLenColumnBulkEntry getEntry(int valuesToRead) {
  // TODO - We should not use force reload for sparse columns (values with lot of nulls)
  load(true); // load new data to process

  final int expectedDataLen = columnPrecInfo.precision;
  final int entrySz = 4 + columnPrecInfo.precision;
  final int readBatch = getFixedLengthMaxRecordsToRead(valuesToRead, entrySz);
  Preconditions.checkState(readBatch > 0, "Read batch count [%s] should be greater than zero", readBatch);

  final int[] valueLengths = entry.getValuesLength();
  final byte[] tgtBuff = entry.getInternalDataArray();
  final byte[] srcBuff = buffer.array();
  int nonNullValues = 0;
  int idx = 0;

  // Fixed precision processing can directly operate on the raw definition-level reader as no peeking
  // is needed.
  final ValuesReader definitionLevels = pageInfo.definitionLevels.getUnderlyingReader();

  for ( ; idx < readBatch; ++idx) {
    if (definitionLevels.readInteger() == 1) {

      final int currPos = nonNullValues * entrySz;
      final int dataLen = getInt(srcBuff, currPos);

      if (dataLen != expectedDataLen) {
        return null; // this is a soft error; caller needs to revert to variable length processing
      }

      valueLengths[idx] = dataLen;
      final int tgt_pos = nonNullValues * expectedDataLen;

      if (expectedDataLen > 0) {
        vlCopy(srcBuff, currPos + 4, tgtBuff, tgt_pos, dataLen);
      }

      // Increase the non null values counter
      ++nonNullValues;

    } else { // Null value
      valueLengths[idx] = -1; // to mark a null value
    }
  }

  // Update the page data buffer offset
  pageInfo.pageDataOff += nonNullValues * entrySz;

  // Now set the bulk entry
  entry.set(0, nonNullValues * expectedDataLen, idx, nonNullValues);

  return entry;
}
 
Example 4
Source File: BenchmarkReadingRandomIntegers.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private void readData(ValuesReader reader, byte[] deltaBytes) throws IOException {
  reader.initFromPage(data.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(deltaBytes)));
  for (int i = 0; i < data.length; i++) {
    reader.readInteger();
  }
}