Java Code Examples for org.apache.parquet.column.statistics.Statistics#createStats()

The following examples show how to use org.apache.parquet.column.statistics.Statistics#createStats() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PrimitiveColumnWriter.java    From presto with Apache License 2.0 6 votes vote down vote up
public PrimitiveColumnWriter(Type type, ColumnDescriptor columnDescriptor, PrimitiveValueWriter primitiveValueWriter, RunLengthBitPackingHybridEncoder definitionLevelEncoder, RunLengthBitPackingHybridEncoder repetitionLevelEncoder, CompressionCodecName compressionCodecName, int pageSizeThreshold)
{
    this.type = requireNonNull(type, "type is null");
    this.columnDescriptor = requireNonNull(columnDescriptor, "columnDescriptor is null");
    this.maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel();

    this.definitionLevelEncoder = requireNonNull(definitionLevelEncoder, "definitionLevelEncoder is null");
    this.repetitionLevelEncoder = requireNonNull(repetitionLevelEncoder, "repetitionLevelEncoder is null");
    this.primitiveValueWriter = requireNonNull(primitiveValueWriter, "primitiveValueWriter is null");
    this.encodings = new HashSet<>();
    this.compressionCodec = requireNonNull(compressionCodecName, "compressionCodecName is null");
    this.compressor = getCompressor(compressionCodecName);
    this.pageSizeThreshold = pageSizeThreshold;

    this.columnStatistics = Statistics.createStats(columnDescriptor.getPrimitiveType());
}
 
Example 2
Source File: ParquetColumnChunkPageWriteStore.java    From Bats with Apache License 2.0 5 votes vote down vote up
private ColumnChunkPageWriter(ColumnDescriptor path,
                              BytesCompressor compressor,
                              int initialSlabSize,
                              int maxCapacityHint,
                              ByteBufferAllocator allocator) {
  this.path = path;
  this.compressor = compressor;
  this.buf = new CapacityByteArrayOutputStream(initialSlabSize, maxCapacityHint, allocator);
  this.totalStatistics = Statistics.createStats(this.path.getPrimitiveType());
}
 
Example 3
Source File: PrimitiveColumnWriter.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public void reset()
{
    pageBuffer.clear();
    closed = false;

    totalCompressedSize = 0;
    totalUnCompressedSize = 0;
    totalRows = 0;
    encodings.clear();
    this.columnStatistics = Statistics.createStats(columnDescriptor.getPrimitiveType());

    getDataStreamsCalled = false;
}
 
Example 4
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static Statistics<?> createStatsTyped(PrimitiveType type, int min, int max) {
  Statistics<?> stats = Statistics.createStats(type);
  stats.updateStats(max);
  stats.updateStats(min);
  assertEquals(min, stats.genericGetMin());
  assertEquals(max, stats.genericGetMax());
  return stats;
}
 
Example 5
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static Statistics<?> createStatsTyped(PrimitiveType type, long min, long max) {
  Statistics<?> stats = Statistics.createStats(type);
  stats.updateStats(max);
  stats.updateStats(min);
  assertEquals(min, stats.genericGetMin());
  assertEquals(max, stats.genericGetMax());
  return stats;
}
 
Example 6
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static Statistics<?> createStatsTyped(PrimitiveType type, BigInteger min, BigInteger max) {
  Statistics<?> stats = Statistics.createStats(type);
  Binary minBinary = FixedBinaryTestUtils.getFixedBinary(type, min);
  Binary maxBinary = FixedBinaryTestUtils.getFixedBinary(type, max);
  stats.updateStats(maxBinary);
  stats.updateStats(minBinary);
  assertEquals(minBinary, stats.genericGetMin());
  assertEquals(maxBinary, stats.genericGetMax());
  return stats;
}
 
Example 7
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testColumnIndexConversion() {
  PrimitiveType type = Types.required(PrimitiveTypeName.INT64).named("test_int64");
  ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  Statistics<?> stats = Statistics.createStats(type);
  stats.incrementNumNulls(16);
  stats.updateStats(-100l);
  stats.updateStats(100l);
  builder.add(stats);
  stats = Statistics.createStats(type);
  stats.incrementNumNulls(111);
  builder.add(stats);
  stats = Statistics.createStats(type);
  stats.updateStats(200l);
  stats.updateStats(500l);
  builder.add(stats);
  org.apache.parquet.format.ColumnIndex parquetColumnIndex =
      ParquetMetadataConverter.toParquetColumnIndex(type, builder.build());
  ColumnIndex columnIndex = ParquetMetadataConverter.fromParquetColumnIndex(type, parquetColumnIndex);
  assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
  assertTrue(Arrays.asList(false, true, false).equals(columnIndex.getNullPages()));
  assertTrue(Arrays.asList(16l, 111l, 0l).equals(columnIndex.getNullCounts()));
  assertTrue(Arrays.asList(
      ByteBuffer.wrap(BytesUtils.longToBytes(-100l)),
      ByteBuffer.allocate(0),
      ByteBuffer.wrap(BytesUtils.longToBytes(200l))).equals(columnIndex.getMinValues()));
  assertTrue(Arrays.asList(
      ByteBuffer.wrap(BytesUtils.longToBytes(100l)),
      ByteBuffer.allocate(0),
      ByteBuffer.wrap(BytesUtils.longToBytes(500l))).equals(columnIndex.getMaxValues()));

  assertNull("Should handle null column index", ParquetMetadataConverter
      .toParquetColumnIndex(Types.required(PrimitiveTypeName.INT32).named("test_int32"), null));
  assertNull("Should ignore unsupported types", ParquetMetadataConverter
      .toParquetColumnIndex(Types.required(PrimitiveTypeName.INT96).named("test_int96"), columnIndex));
  assertNull("Should ignore unsupported types",
      ParquetMetadataConverter.fromParquetColumnIndex(Types.required(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
          .length(12).as(OriginalType.INTERVAL).named("test_interval"), parquetColumnIndex));
}
 
Example 8
Source File: PrimitiveValueWriter.java    From presto with Apache License 2.0 4 votes vote down vote up
public PrimitiveValueWriter(PrimitiveType parquetType, ValuesWriter valuesWriter)
{
    this.parquetType = requireNonNull(parquetType, "parquetType is null");
    this.valuesWriter = requireNonNull(valuesWriter, "valuesWriter is null");
    this.statistics = Statistics.createStats(parquetType);
}
 
Example 9
Source File: PrimitiveValueWriter.java    From presto with Apache License 2.0 4 votes vote down vote up
@Override
public void reset()
{
    valuesWriter.reset();
    this.statistics = Statistics.createStats(parquetType);
}
 
Example 10
Source File: ColumnWriterBase.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private void resetStatistics() {
  this.statistics = Statistics.createStats(path.getPrimitiveType());
}