Java Code Examples for org.apache.parquet.io.SeekableInputStream#readFully()

The following examples show how to use org.apache.parquet.io.SeekableInputStream#readFully() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetFileReader.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private DictionaryPage readCompressedDictionary(
    PageHeader pageHeader, SeekableInputStream fin) throws IOException {
  DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header();

  int uncompressedPageSize = pageHeader.getUncompressed_page_size();
  int compressedPageSize = pageHeader.getCompressed_page_size();

  byte [] dictPageBytes = new byte[compressedPageSize];
  fin.readFully(dictPageBytes);

  BytesInput bin = BytesInput.from(dictPageBytes);

  return new DictionaryPage(
      bin, uncompressedPageSize, dictHeader.getNum_values(),
      converter.getEncoding(dictHeader.getEncoding()));
}
 
Example 2
Source File: ParquetFileReader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f, ParquetMetadataConverter converter) throws IOException {
  long fileLen = file.getLength();
  LOG.debug("File length {}", fileLen);
  int FOOTER_LENGTH_SIZE = 4;
  if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) { // MAGIC + data + footer + footerIndex + MAGIC
    throw new RuntimeException(file.toString() + " is not a Parquet file (too small length: " + fileLen + ")");
  }
  long footerLengthIndex = fileLen - FOOTER_LENGTH_SIZE - MAGIC.length;
  LOG.debug("reading footer index at {}", footerLengthIndex);

  f.seek(footerLengthIndex);
  int footerLength = readIntLittleEndian(f);
  byte[] magic = new byte[MAGIC.length];
  f.readFully(magic);
  if (!Arrays.equals(MAGIC, magic)) {
    throw new RuntimeException(file.toString() + " is not a Parquet file. expected magic number at tail " + Arrays.toString(MAGIC) + " but found " + Arrays.toString(magic));
  }
  long footerIndex = footerLengthIndex - footerLength;
  LOG.debug("read footer length: {}, footer index: {}", footerLength, footerIndex);
  if (footerIndex < MAGIC.length || footerIndex >= footerLengthIndex) {
    throw new RuntimeException("corrupted file: the footer index is not within the file: " + footerIndex);
  }
  f.seek(footerIndex);
  // Read all the footer bytes in one time to avoid multiple read operations,
  // since it can be pretty time consuming for a single read operation in HDFS.
  ByteBuffer footerBytesBuffer = ByteBuffer.allocate(footerLength);
  f.readFully(footerBytesBuffer);
  LOG.debug("Finished to read all footer bytes.");
  footerBytesBuffer.flip();
  InputStream footerBytesStream = ByteBufferInputStream.wrap(footerBytesBuffer);
  return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter());
}
 
Example 3
Source File: ParquetFileReader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * @param f file to read the chunks from
 * @param builder used to build chunk list to read the pages for the different columns
 * @throws IOException if there is an error while reading from the stream
 */
public void readAll(SeekableInputStream f, ChunkListBuilder builder) throws IOException {
  List<Chunk> result = new ArrayList<Chunk>(chunks.size());
  f.seek(offset);

  int fullAllocations = length / options.getMaxAllocationSize();
  int lastAllocationSize = length % options.getMaxAllocationSize();

  int numAllocations = fullAllocations + (lastAllocationSize > 0 ? 1 : 0);
  List<ByteBuffer> buffers = new ArrayList<>(numAllocations);

  for (int i = 0; i < fullAllocations; i += 1) {
    buffers.add(options.getAllocator().allocate(options.getMaxAllocationSize()));
  }

  if (lastAllocationSize > 0) {
    buffers.add(options.getAllocator().allocate(lastAllocationSize));
  }

  for (ByteBuffer buffer : buffers) {
    f.readFully(buffer);
    buffer.flip();
  }

  // report in a counter the data we just scanned
  BenchmarkCounter.incrementBytesRead(length);
  ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffers);
  for (int i = 0; i < chunks.size(); i++) {
    ChunkDescriptor descriptor = chunks.get(i);
    builder.add(descriptor, stream.sliceBuffers(descriptor.size), f);
  }
}
 
Example 4
Source File: VectorAccessibleSerializable.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static void readFromStream(SeekableInputStream input, final ArrowBuf outputBuffer, final int bytesToRead) throws IOException{
  final ByteBuffer directBuffer = outputBuffer.nioBuffer(0, bytesToRead);
  input.readFully(directBuffer);
  outputBuffer.writerIndex(bytesToRead);
}
 
Example 5
Source File: TestBulkInputStream.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
private void testSeekableStream(SeekableInputStream inputStream) throws IOException {
  int streamPos = 0;
  assertEquals(streamPos, inputStream.getPos());

  // Read some bytes from the start
  final byte[] buf = new byte[1000];
  inputStream.readFully(buf, 0, 88);
  compareData(buf, 0, streamPos, 88);
  streamPos += 88;
  assertEquals(streamPos, inputStream.getPos());

  final byte[] shortBuf = new byte[17];
  inputStream.readFully(shortBuf);
  compareData(shortBuf, 0, streamPos, 17);
  streamPos += 17;
  assertEquals(streamPos, inputStream.getPos());

  // test ByteBuffer interfaces
  final ByteBuffer shortByteBuf = ByteBuffer.allocate(25);
  inputStream.read(shortByteBuf);
  compareData(shortByteBuf.array(), 0, streamPos, 25);
  streamPos += 25;
  assertEquals(streamPos, inputStream.getPos());

  final ByteBuffer shortByteBuf2 = ByteBuffer.allocateDirect(71);
  inputStream.read(shortByteBuf2);
  final ByteBuf compareBuf = Unpooled.directBuffer(100);
  shortByteBuf2.flip();
  compareBuf.writeBytes(shortByteBuf2);
  compareData(compareBuf, streamPos, 71);
  streamPos += 71;
  assertEquals(streamPos, inputStream.getPos());

  final ByteBuffer shortByteBuf3 = ByteBuffer.allocate(66);
  inputStream.readFully(shortByteBuf3);
  compareData(shortByteBuf3.array(), 0, streamPos, 66);
  streamPos += 66;
  assertEquals(streamPos, inputStream.getPos());

  // Test plain old read interface
  buf[0] = (byte) inputStream.read();
  buf[1] = (byte) inputStream.read();
  buf[2] = (byte) inputStream.read();
  compareData(buf, 0, streamPos, 3);
  streamPos += 3;
  assertEquals(streamPos, inputStream.getPos());

  // Skip some, then read
  streamPos += 50;  // skip 50 bytes
  inputStream.seek(streamPos);
  inputStream.readFully(buf, 0, 37);
  compareData(buf, 0, streamPos, 37);
  streamPos += 37;
  assertEquals(streamPos, inputStream.getPos());

  // skip to near the end, then read
  streamPos = TEST_DATA_SIZE - 100;
  inputStream.seek(streamPos);
  inputStream.readFully(buf, 0, 100);
  compareData(buf, 0, streamPos,100);
  streamPos += 100;
  assertEquals(streamPos, inputStream.getPos());
}