Java Code Examples for org.apache.parquet.filter2.compat.FilterCompat#get()

The following examples show how to use org.apache.parquet.filter2.compat.FilterCompat#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileInputSplit split) throws IOException {
	// reset the flag when open a new split
	this.skipThisSplit = false;
	org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration();
	InputFile inputFile =
		HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(split.getPath().toUri()), configuration);
	ParquetReadOptions options = ParquetReadOptions.builder().build();
	ParquetFileReader fileReader = new ParquetFileReader(inputFile, options);
	MessageType fileSchema = fileReader.getFileMetaData().getSchema();
	MessageType readSchema = getReadSchema(fileSchema, split.getPath());
	if (skipThisSplit) {
		LOG.warn(String.format(
			"Escaped the file split [%s] due to mismatch of file schema to expected result schema",
			split.getPath().toString()));
	} else {
		this.parquetRecordReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema,
			filterPredicate == null ? FilterCompat.NOOP : FilterCompat.get(filterPredicate));
		this.parquetRecordReader.initialize(fileReader, configuration);
		this.parquetRecordReader.setSkipCorruptedRecord(this.skipCorruptedRecord);

		if (this.recordConsumed == null) {
			this.recordConsumed = getRuntimeContext().getMetricGroup().counter("parquet-records-consumed");
		}

		LOG.debug(String.format("Open ParquetInputFormat with FileInputSplit [%s]", split.getPath().toString()));
	}
}
 
Example 2
Source File: ParquetFilters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static FilterCompat.Filter convert(Schema schema, Expression expr, boolean caseSensitive) {
  FilterPredicate pred = ExpressionVisitors.visit(expr, new ConvertFilterToParquet(schema, caseSensitive));
  // TODO: handle AlwaysFalse.INSTANCE
  if (pred != null && pred != AlwaysTrue.INSTANCE) {
    // FilterCompat will apply LogicalInverseRewriter
    return FilterCompat.get(pred);
  } else {
    return FilterCompat.NOOP;
  }
}
 
Example 3
Source File: ParquetRecordFilterBuilder.java    From pxf with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the built record filter
 *
 * @return the built record filter
 */
public FilterCompat.Filter getRecordFilter() {
    FilterPredicate predicate = filterQueue.poll();
    if (!filterQueue.isEmpty()) {
        throw new IllegalStateException("Filter queue is not empty after visiting all nodes");
    }
    return predicate != null ? FilterCompat.get(predicate) : FilterCompat.NOOP;
}
 
Example 4
Source File: ParquetFilters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static FilterCompat.Filter convert(Schema schema, Expression expr) {
  FilterPredicate pred = visit(expr, new ConvertFilterToParquet(schema));
  // TODO: handle AlwaysFalse.INSTANCE
  if (pred != null && pred != AlwaysTrue.INSTANCE) {
    // FilterCompat will apply LogicalInverseRewriter
    return FilterCompat.get(pred);
  } else {
    return FilterCompat.NOOP;
  }
}
 
Example 5
Source File: ParquetFilters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static FilterCompat.Filter convertColumnFilter(Schema schema, String column, Expression expr) {
  FilterPredicate pred = visit(expr, new ConvertColumnFilterToParquet(schema, column));
  // TODO: handle AlwaysFalse.INSTANCE
  if (pred != null && pred != AlwaysTrue.INSTANCE) {
    // FilterCompat will apply LogicalInverseRewriter
    return FilterCompat.get(pred);
  } else {
    return FilterCompat.NOOP;
  }
}
 
Example 6
Source File: ParquetInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileInputSplit split) throws IOException {
	// reset the flag when open a new split
	this.skipThisSplit = false;
	org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration();
	InputFile inputFile =
		HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(split.getPath().toUri()), configuration);
	ParquetReadOptions options = ParquetReadOptions.builder().build();
	ParquetFileReader fileReader = new ParquetFileReader(inputFile, options);
	MessageType fileSchema = fileReader.getFileMetaData().getSchema();
	MessageType readSchema = getReadSchema(fileSchema, split.getPath());
	if (skipThisSplit) {
		LOG.warn(String.format(
			"Escaped the file split [%s] due to mismatch of file schema to expected result schema",
			split.getPath().toString()));
	} else {
		this.parquetRecordReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema,
			filterPredicate == null ? FilterCompat.NOOP : FilterCompat.get(filterPredicate));
		this.parquetRecordReader.initialize(fileReader, configuration);
		this.parquetRecordReader.setSkipCorruptedRecord(this.skipCorruptedRecord);

		if (this.recordConsumed == null) {
			this.recordConsumed = getRuntimeContext().getMetricGroup().counter("parquet-records-consumed");
		}

		LOG.debug(String.format("Open ParquetInputFormat with FileInputSplit [%s]", split.getPath().toString()));
	}
}
 
Example 7
Source File: TestMultipleWriteRead.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void validateFileWithComplexFilter(Path file, List<Group> data) throws IOException {
  Binary binaryValueB = fromString("b");
  Filter filter = FilterCompat.get(
      and(
          gtEq(intColumn("id"), 0),
          and(
              lt(binaryColumn("name"), binaryValueB),
              notEq(binaryColumn("comment"), null))));
  Predicate<Group> predicate = group -> group.getInteger("id", 0) >= 0
      && BINARY_COMPARATOR.compare(group.getBinary("name", 0), binaryValueB) < 0
      && group.getFieldRepetitionCount("comment") > 0;
  validateFile(file, filter, data.stream().filter(predicate));
}
 
Example 8
Source File: InternalParquetRecordReader.java    From tajo with Apache License 2.0 2 votes vote down vote up
/**
 * @param readSupport Object which helps reads files of the given type, e.g. Thrift, Avro.
 * @param filter Optional filter for only returning matching records.
 * @deprecated use {@link #InternalParquetRecordReader(ReadSupport, Filter)}
 */
@Deprecated
public InternalParquetRecordReader(ReadSupport<T> readSupport, UnboundRecordFilter filter) {
  this(readSupport, FilterCompat.get(filter));
}
 
Example 9
Source File: ParquetReader.java    From tajo with Apache License 2.0 2 votes vote down vote up
/**
 * @param file the file to read
 * @param readSupport to materialize records
 * @param unboundRecordFilter the filter to use to filter records
 * @throws IOException
 * @deprecated use {@link #builder(ReadSupport, Path)}
 */
@Deprecated
public ParquetReader(Path file, ReadSupport<T> readSupport, UnboundRecordFilter unboundRecordFilter) throws IOException {
  this(new Configuration(), file, readSupport, FilterCompat.get(unboundRecordFilter));
}
 
Example 10
Source File: ParquetReader.java    From tajo with Apache License 2.0 2 votes vote down vote up
/**
 * @param conf the configuration
 * @param file the file to read
 * @param readSupport to materialize records
 * @param unboundRecordFilter the filter to use to filter records
 * @throws IOException
 * @deprecated use {@link #builder(ReadSupport, Path)}
 */
@Deprecated
public ParquetReader(Configuration conf, Path file, ReadSupport<T> readSupport, UnboundRecordFilter unboundRecordFilter) throws IOException {
  this(conf, file, readSupport, FilterCompat.get(unboundRecordFilter));
}
 
Example 11
Source File: InternalParquetRecordReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param readSupport Object which helps reads files of the given type, e.g. Thrift, Avro.
 * @param filter Optional filter for only returning matching records.
 * @deprecated use {@link #InternalParquetRecordReader(ReadSupport, Filter)}
 */
@Deprecated
public InternalParquetRecordReader(ReadSupport<T> readSupport, UnboundRecordFilter filter) {
  this(readSupport, FilterCompat.get(filter));
}
 
Example 12
Source File: ParquetReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param file the file to read
 * @param readSupport to materialize records
 * @param unboundRecordFilter the filter to use to filter records
 * @throws IOException if there is an error while reading
 * @deprecated use {@link #builder(ReadSupport, Path)}
 */
@Deprecated
public ParquetReader(Path file, ReadSupport<T> readSupport, UnboundRecordFilter unboundRecordFilter) throws IOException {
  this(new Configuration(), file, readSupport, FilterCompat.get(unboundRecordFilter));
}
 
Example 13
Source File: ParquetReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param conf the configuration
 * @param file the file to read
 * @param readSupport to materialize records
 * @param unboundRecordFilter the filter to use to filter records
 * @throws IOException if there is an error while reading
 * @deprecated use {@link #builder(ReadSupport, Path)}
 */
@Deprecated
public ParquetReader(Configuration conf, Path file, ReadSupport<T> readSupport, UnboundRecordFilter unboundRecordFilter) throws IOException {
  this(conf, file, readSupport, FilterCompat.get(unboundRecordFilter));
}
 
Example 14
Source File: ParquetInputFormat.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * Returns a non-null Filter, which is a wrapper around either a
 * FilterPredicate, an UnboundRecordFilter, or a no-op filter.
 *
 * @param conf a configuration
 * @return a filter for the unbound record filter specified in conf
 */
public static Filter getFilter(Configuration conf) {
  return FilterCompat.get(getFilterPredicate(conf), getUnboundRecordFilterInstance(conf));
}
 
Example 15
Source File: ParquetRecordReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param readSupport Object which helps reads files of the given type, e.g. Thrift, Avro.
 * @param filter for filtering individual records
 * @deprecated will be removed in 2.0.0.
 */
@Deprecated
public ParquetRecordReader(ReadSupport<T> readSupport, UnboundRecordFilter filter) {
  this(readSupport, FilterCompat.get(filter));
}