Java Code Examples for org.apache.parquet.HadoopReadOptions#builder()

The following examples show how to use org.apache.parquet.HadoopReadOptions#builder() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetReader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Deprecated
private Builder(ReadSupport<T> readSupport, Path path) {
  this.readSupport = Objects.requireNonNull(readSupport, "readSupport cannot be null");
  this.file = null;
  this.path = Objects.requireNonNull(path, "path cannot be null");
  this.conf = new Configuration();
  this.optionsBuilder = HadoopReadOptions.builder(conf);
}
 
Example 2
Source File: ParquetReader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Deprecated
protected Builder(Path path) {
  this.readSupport = null;
  this.file = null;
  this.path = Objects.requireNonNull(path, "path cannot be null");
  this.conf = new Configuration();
  this.optionsBuilder = HadoopReadOptions.builder(conf);
}
 
Example 3
Source File: ParquetReader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
protected Builder(InputFile file) {
  this.readSupport = null;
  this.file = Objects.requireNonNull(file, "file cannot be null");
  this.path = null;
  if (file instanceof HadoopInputFile) {
    this.conf = ((HadoopInputFile) file).getConfiguration();
  } else {
    this.conf = new Configuration();
  }
  optionsBuilder = HadoopReadOptions.builder(conf);
}
 
Example 4
Source File: ParquetReader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public Builder<T> withConf(Configuration conf) {
  this.conf = Objects.requireNonNull(conf, "conf cannot be null");

  // previous versions didn't use the builder, so may set filter before conf. this maintains
  // compatibility for filter. other options are reset by a new conf.
  this.optionsBuilder = HadoopReadOptions.builder(conf);
  if (filter != null) {
    optionsBuilder.withRecordFilter(filter);
  }

  return this;
}
 
Example 5
Source File: ParquetRecordReader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void initializeInternalReader(ParquetInputSplit split, Configuration configuration) throws IOException {
  Path path = split.getPath();
  long[] rowGroupOffsets = split.getRowGroupOffsets();

  // if task.side.metadata is set, rowGroupOffsets is null
  ParquetReadOptions.Builder optionsBuilder = HadoopReadOptions.builder(configuration);
  if (rowGroupOffsets != null) {
    optionsBuilder.withOffsets(rowGroupOffsets);
  } else {
    optionsBuilder.withRange(split.getStart(), split.getEnd());
  }

  // open a reader with the metadata filter
  ParquetFileReader reader = ParquetFileReader.open(
      HadoopInputFile.fromPath(path, configuration), optionsBuilder.build());

  if (rowGroupOffsets != null) {
    // verify a row group was found for each offset
    List<BlockMetaData> blocks = reader.getFooter().getBlocks();
    if (blocks.size() != rowGroupOffsets.length) {
      throw new IllegalStateException(
          "All of the offsets in the split should be found in the file."
          + " expected: " + Arrays.toString(rowGroupOffsets)
          + " found: " + blocks);
    }
  }

  if (!reader.getRowGroups().isEmpty()) {
    checkDeltaByteArrayProblem(
        reader.getFooter().getFileMetaData(), configuration,
        reader.getRowGroups().get(0));
  }

  internalReader.initialize(reader, configuration);
}