Java Code Examples for org.apache.parquet.hadoop.metadata.ColumnChunkMetaData#getTotalUncompressedSize()

The following examples show how to use org.apache.parquet.hadoop.metadata.ColumnChunkMetaData#getTotalUncompressedSize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PageReader.java    From Bats with Apache License 2.0 5 votes vote down vote up
PageReader(org.apache.drill.exec.store.parquet.columnreaders.ColumnReader<?> parentStatus, FileSystem fs, Path path, ColumnChunkMetaData columnChunkMetaData)
  throws ExecutionSetupException {
  this.parentColumnReader = parentStatus;
  allocatedDictionaryBuffers = new ArrayList<ByteBuf>();
  codecFactory = parentColumnReader.parentReader.getCodecFactory();
  this.stats = parentColumnReader.parentReader.parquetReaderStats;
  this.fileName = path.toString();
  debugName = new StringBuilder()
     .append(this.parentColumnReader.parentReader.getFragmentContext().getFragIdString())
     .append(":")
     .append(this.parentColumnReader.parentReader.getOperatorContext().getStats().getId() )
     .append(this.parentColumnReader.columnChunkMetaData.toString() )
     .toString();
  try {
    inputStream  = fs.open(path);
    BufferAllocator allocator =  parentColumnReader.parentReader.getOperatorContext().getAllocator();
    columnChunkMetaData.getTotalUncompressedSize();
    useBufferedReader  = parentColumnReader.parentReader.useBufferedReader;
    scanBufferSize = parentColumnReader.parentReader.bufferedReadSize;
    useFadvise = parentColumnReader.parentReader.useFadvise;
    enforceTotalSize = parentColumnReader.parentReader.enforceTotalSize;
    if (useBufferedReader) {
      this.dataReader = new BufferedDirectBufInputStream(inputStream, allocator, path.getName(),
          columnChunkMetaData.getStartingPos(), columnChunkMetaData.getTotalSize(), scanBufferSize,
          enforceTotalSize, useFadvise);
    } else {
      this.dataReader = new DirectBufInputStream(inputStream, allocator, path.getName(),
          columnChunkMetaData.getStartingPos(), columnChunkMetaData.getTotalSize(), enforceTotalSize,
          useFadvise);
    }
  } catch (IOException e) {
    throw new ExecutionSetupException("Error opening or reading metadata for parquet file at location: "
        + path.getName(), e);
  }

}
 
Example 2
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, ColumnChunkMetaData meta, boolean name) {
  long doff = meta.getDictionaryPageOffset();
  long foff = meta.getFirstDataPageOffset();
  long tsize = meta.getTotalSize();
  long usize = meta.getTotalUncompressedSize();
  long count = meta.getValueCount();
  double ratio = usize / (double)tsize;
  String encodings = Joiner.on(',').skipNulls().join(meta.getEncodings());

  if (name) {
    String path = Joiner.on('.').skipNulls().join(meta.getPath());
    out.format("%s: ", path);
  }

  out.format(" %s", meta.getType());
  out.format(" %s", meta.getCodec());
  out.format(" DO:%d", doff);
  out.format(" FPO:%d", foff);
  out.format(" SZ:%d/%d/%.2f", tsize, usize, ratio);
  out.format(" VC:%d", count);
  if (!encodings.isEmpty()) out.format(" ENC:%s", encodings);
  Statistics<?> stats = meta.getStatistics();
  if (stats != null) {
    out.format(" ST:[%s]", stats);
  } else {
    out.format(" ST:[none]");
  }
  out.println();
}
 
Example 3
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, ColumnChunkMetaData meta, boolean name) {
  long doff = meta.getDictionaryPageOffset();
  long foff = meta.getFirstDataPageOffset();
  long tsize = meta.getTotalSize();
  long usize = meta.getTotalUncompressedSize();
  long count = meta.getValueCount();
  double ratio = usize / (double)tsize;
  String encodings = Joiner.on(',').skipNulls().join(meta.getEncodings());

  if (name) {
    String path = Joiner.on('.').skipNulls().join(meta.getPath());
    out.format("%s: ", path);
  }

  out.format(" %s", meta.getType());
  out.format(" %s", meta.getCodec());
  out.format(" DO:%d", doff);
  out.format(" FPO:%d", foff);
  out.format(" SZ:%d/%d/%.2f", tsize, usize, ratio);
  out.format(" VC:%d", count);
  if (!encodings.isEmpty()) out.format(" ENC:%s", encodings);
  Statistics<?> stats = meta.getStatistics();
  if (stats != null) {
    out.format(" ST:[%s]", stats);
  } else {
    out.format(" ST:[none]");
  }
  out.println();
}
 
Example 4
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGroups, BlockMetaData block) {
    //rowGroup.total_byte_size = ;
    List<ColumnChunkMetaData> columns = block.getColumns();
    List<ColumnChunk> parquetColumns = new ArrayList<ColumnChunk>();
    for (ColumnChunkMetaData columnMetaData : columns) {
      ColumnChunk columnChunk = new ColumnChunk(columnMetaData.getFirstDataPageOffset()); // verify this is the right offset
      columnChunk.file_path = block.getPath(); // they are in the same file for now
      columnChunk.meta_data = new ColumnMetaData(
          getType(columnMetaData.getType()),
          toFormatEncodings(columnMetaData.getEncodings()),
          Arrays.asList(columnMetaData.getPath().toArray()),
          toFormatCodec(columnMetaData.getCodec()),
          columnMetaData.getValueCount(),
          columnMetaData.getTotalUncompressedSize(),
          columnMetaData.getTotalSize(),
          columnMetaData.getFirstDataPageOffset());
      if (columnMetaData.getEncodingStats() != null && columnMetaData.getEncodingStats().hasDictionaryPages()) {
        columnChunk.meta_data.setDictionary_page_offset(columnMetaData.getDictionaryPageOffset());
      }
      columnChunk.meta_data.setBloom_filter_offset(columnMetaData.getBloomFilterOffset());
      if (!columnMetaData.getStatistics().isEmpty()) {
        columnChunk.meta_data.setStatistics(toParquetStatistics(columnMetaData.getStatistics(), this.statisticsTruncateLength));
      }
      if (columnMetaData.getEncodingStats() != null) {
        columnChunk.meta_data.setEncoding_stats(convertEncodingStats(columnMetaData.getEncodingStats()));
      }
//      columnChunk.meta_data.index_page_offset = ;
//      columnChunk.meta_data.key_value_metadata = ; // nothing yet

      IndexReference columnIndexRef = columnMetaData.getColumnIndexReference();
      if (columnIndexRef != null) {
        columnChunk.setColumn_index_offset(columnIndexRef.getOffset());
        columnChunk.setColumn_index_length(columnIndexRef.getLength());
      }
      IndexReference offsetIndexRef = columnMetaData.getOffsetIndexReference();
      if (offsetIndexRef != null) {
        columnChunk.setOffset_index_offset(offsetIndexRef.getOffset());
        columnChunk.setOffset_index_length(offsetIndexRef.getLength());
      }

      parquetColumns.add(columnChunk);
    }
    RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount());
    rowGroups.add(rowGroup);
  }