Java Code Examples for org.apache.parquet.format.converter.ParquetMetadataConverter#MetadataFilter

The following examples show how to use org.apache.parquet.format.converter.ParquetMetadataConverter#MetadataFilter . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetFileAccessor.java    From pxf with Apache License 2.0 5 votes vote down vote up
/**
 * Reads the original schema from the parquet file.
 *
 * @param parquetFile the path to the parquet file
 * @param fileSplit   the file split we are accessing
 * @return the original schema from the parquet file
 * @throws IOException when there's an IOException while reading the schema
 */
private MessageType getSchema(Path parquetFile, FileSplit fileSplit) throws IOException {

    final long then = System.nanoTime();
    ParquetMetadataConverter.MetadataFilter filter = ParquetMetadataConverter.range(
            fileSplit.getStart(), fileSplit.getStart() + fileSplit.getLength());
    ParquetReadOptions parquetReadOptions = HadoopReadOptions
            .builder(configuration)
            .withMetadataFilter(filter)
            .build();
    HadoopInputFile inputFile = HadoopInputFile.fromPath(parquetFile, configuration);
    try (ParquetFileReader parquetFileReader =
                 ParquetFileReader.open(inputFile, parquetReadOptions)) {
        FileMetaData metadata = parquetFileReader.getFileMetaData();
        if (LOG.isDebugEnabled()) {
            LOG.debug("{}-{}: Reading file {} with {} records in {} RowGroups",
                    context.getTransactionId(), context.getSegmentId(),
                    parquetFile.getName(), parquetFileReader.getRecordCount(),
                    parquetFileReader.getRowGroups().size());
        }
        final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - then);
        LOG.debug("{}-{}: Read schema in {} ms", context.getTransactionId(),
                context.getSegmentId(), millis);
        return metadata.getSchema();
    } catch (Exception e) {
        throw new IOException(e);
    }
}
 
Example 2
Source File: SingletonParquetFooterCache.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
/**
 * An updated footer reader that tries to read the entire footer without knowing the length.
 * This should reduce the amount of seek/read roundtrips in most workloads.
 * @param fs
 * @param status
 * @return
 * @throws IOException
 */
public static ParquetMetadata readFooter(
  final FileSystem fs,
  final FileAttributes attributes,
  ParquetMetadataConverter.MetadataFilter filter,
  long maxFooterLen) throws IOException {
  try(BulkInputStream file = BulkInputStream.wrap(Streams.wrap(fs.open(attributes.getPath())))) {
    return readFooter(file, attributes.getPath().toString(), attributes.size(), filter, fs, maxFooterLen);
  }
}
 
Example 3
Source File: ParquetReadOptions.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
ParquetReadOptions(boolean useSignedStringMinMax,
                   boolean useStatsFilter,
                   boolean useDictionaryFilter,
                   boolean useRecordFilter,
                   boolean useColumnIndexFilter,
                   boolean usePageChecksumVerification,
                   boolean useBloomFilter,
                   FilterCompat.Filter recordFilter,
                   ParquetMetadataConverter.MetadataFilter metadataFilter,
                   CompressionCodecFactory codecFactory,
                   ByteBufferAllocator allocator,
                   int maxAllocationSize,
                   Map<String, String> properties) {
  this.useSignedStringMinMax = useSignedStringMinMax;
  this.useStatsFilter = useStatsFilter;
  this.useDictionaryFilter = useDictionaryFilter;
  this.useRecordFilter = useRecordFilter;
  this.useColumnIndexFilter = useColumnIndexFilter;
  this.usePageChecksumVerification = usePageChecksumVerification;
  this.useBloomFilter = useBloomFilter;
  this.recordFilter = recordFilter;
  this.metadataFilter = metadataFilter;
  this.codecFactory = codecFactory;
  this.allocator = allocator;
  this.maxAllocationSize = maxAllocationSize;
  this.properties = Collections.unmodifiableMap(properties);
}
 
Example 4
Source File: SingletonParquetFooterCache.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static ParquetMetadata readFooter(final FileSystem fs, final Path file, ParquetMetadataConverter.MetadataFilter filter,
                                         long maxFooterLen) throws IOException  {
  return readFooter(fs, fs.getFileAttributes(file), filter, maxFooterLen);
}
 
Example 5
Source File: ParquetReadOptions.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public ParquetMetadataConverter.MetadataFilter getMetadataFilter() {
  return metadataFilter;
}
 
Example 6
Source File: ParquetReadOptions.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public Builder withMetadataFilter(ParquetMetadataConverter.MetadataFilter metadataFilter) {
  this.metadataFilter = metadataFilter;
  return this;
}