Java Code Examples for org.apache.parquet.filter2.compat.FilterCompat#NOOP

The following examples show how to use org.apache.parquet.filter2.compat.FilterCompat#NOOP . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileInputSplit split) throws IOException {
	// reset the flag when open a new split
	this.skipThisSplit = false;
	org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration();
	InputFile inputFile =
		HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(split.getPath().toUri()), configuration);
	ParquetReadOptions options = ParquetReadOptions.builder().build();
	ParquetFileReader fileReader = new ParquetFileReader(inputFile, options);
	MessageType fileSchema = fileReader.getFileMetaData().getSchema();
	MessageType readSchema = getReadSchema(fileSchema, split.getPath());
	if (skipThisSplit) {
		LOG.warn(String.format(
			"Escaped the file split [%s] due to mismatch of file schema to expected result schema",
			split.getPath().toString()));
	} else {
		this.parquetRecordReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema,
			filterPredicate == null ? FilterCompat.NOOP : FilterCompat.get(filterPredicate));
		this.parquetRecordReader.initialize(fileReader, configuration);
		this.parquetRecordReader.setSkipCorruptedRecord(this.skipCorruptedRecord);

		if (this.recordConsumed == null) {
			this.recordConsumed = getRuntimeContext().getMetricGroup().counter("parquet-records-consumed");
		}

		LOG.debug(String.format("Open ParquetInputFormat with FileInputSplit [%s]", split.getPath().toString()));
	}
}
 
Example 2
Source File: ParquetFilters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static FilterCompat.Filter convert(Schema schema, Expression expr, boolean caseSensitive) {
  FilterPredicate pred = ExpressionVisitors.visit(expr, new ConvertFilterToParquet(schema, caseSensitive));
  // TODO: handle AlwaysFalse.INSTANCE
  if (pred != null && pred != AlwaysTrue.INSTANCE) {
    // FilterCompat will apply LogicalInverseRewriter
    return FilterCompat.get(pred);
  } else {
    return FilterCompat.NOOP;
  }
}
 
Example 3
Source File: ParquetFileAccessor.java    From pxf with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the parquet record filter for the given filter string
 *
 * @param filterString      the filter string
 * @param originalFieldsMap a map of field names to types
 * @param schema            the parquet schema
 * @return the parquet record filter for the given filter string
 */
private FilterCompat.Filter getRecordFilter(String filterString, Map<String, Type> originalFieldsMap, MessageType schema) {
    if (StringUtils.isBlank(filterString)) {
        return FilterCompat.NOOP;
    }

    ParquetRecordFilterBuilder filterBuilder = new ParquetRecordFilterBuilder(
            context.getTupleDescription(), originalFieldsMap);
    TreeVisitor pruner = new ParquetOperatorPrunerAndTransformer(
            context.getTupleDescription(), originalFieldsMap, SUPPORTED_OPERATORS);

    try {
        // Parse the filter string into a expression tree Node
        Node root = new FilterParser().parse(filterString);
        // Prune the parsed tree with valid supported operators and then
        // traverse the pruned tree with the ParquetRecordFilterBuilder to
        // produce a record filter for parquet
        TRAVERSER.traverse(root, pruner, filterBuilder);
        return filterBuilder.getRecordFilter();
    } catch (Exception e) {
        LOG.error(String.format("%s-%d: %s--%s Unable to generate Parquet Record Filter for filter",
                context.getTransactionId(),
                context.getSegmentId(),
                context.getDataSource(),
                context.getFilterString()), e);
        return FilterCompat.NOOP;
    }
}
 
Example 4
Source File: ParquetRecordFilterBuilder.java    From pxf with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the built record filter
 *
 * @return the built record filter
 */
public FilterCompat.Filter getRecordFilter() {
    FilterPredicate predicate = filterQueue.poll();
    if (!filterQueue.isEmpty()) {
        throw new IllegalStateException("Filter queue is not empty after visiting all nodes");
    }
    return predicate != null ? FilterCompat.get(predicate) : FilterCompat.NOOP;
}
 
Example 5
Source File: ParquetFilters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static FilterCompat.Filter convert(Schema schema, Expression expr) {
  FilterPredicate pred = visit(expr, new ConvertFilterToParquet(schema));
  // TODO: handle AlwaysFalse.INSTANCE
  if (pred != null && pred != AlwaysTrue.INSTANCE) {
    // FilterCompat will apply LogicalInverseRewriter
    return FilterCompat.get(pred);
  } else {
    return FilterCompat.NOOP;
  }
}
 
Example 6
Source File: ParquetFilters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static FilterCompat.Filter convertColumnFilter(Schema schema, String column, Expression expr) {
  FilterPredicate pred = visit(expr, new ConvertColumnFilterToParquet(schema, column));
  // TODO: handle AlwaysFalse.INSTANCE
  if (pred != null && pred != AlwaysTrue.INSTANCE) {
    // FilterCompat will apply LogicalInverseRewriter
    return FilterCompat.get(pred);
  } else {
    return FilterCompat.NOOP;
  }
}
 
Example 7
Source File: ParquetInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileInputSplit split) throws IOException {
	// reset the flag when open a new split
	this.skipThisSplit = false;
	org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration();
	InputFile inputFile =
		HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(split.getPath().toUri()), configuration);
	ParquetReadOptions options = ParquetReadOptions.builder().build();
	ParquetFileReader fileReader = new ParquetFileReader(inputFile, options);
	MessageType fileSchema = fileReader.getFileMetaData().getSchema();
	MessageType readSchema = getReadSchema(fileSchema, split.getPath());
	if (skipThisSplit) {
		LOG.warn(String.format(
			"Escaped the file split [%s] due to mismatch of file schema to expected result schema",
			split.getPath().toString()));
	} else {
		this.parquetRecordReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema,
			filterPredicate == null ? FilterCompat.NOOP : FilterCompat.get(filterPredicate));
		this.parquetRecordReader.initialize(fileReader, configuration);
		this.parquetRecordReader.setSkipCorruptedRecord(this.skipCorruptedRecord);

		if (this.recordConsumed == null) {
			this.recordConsumed = getRuntimeContext().getMetricGroup().counter("parquet-records-consumed");
		}

		LOG.debug(String.format("Open ParquetInputFormat with FileInputSplit [%s]", split.getPath().toString()));
	}
}
 
Example 8
Source File: ThriftParquetReader.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private Builder(Path file) {
  this.file = Objects.requireNonNull(file, "file cannot be null");
  this.conf = new Configuration();
  this.filter = FilterCompat.NOOP;
  this.thriftClass = null;
}
 
Example 9
Source File: ParquetRecordReader.java    From flink with Apache License 2.0 4 votes vote down vote up
public ParquetRecordReader(ReadSupport<T> readSupport, MessageType readSchema) {
	this(readSupport, readSchema, FilterCompat.NOOP);
}
 
Example 10
Source File: ParquetRecordReader.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
/**
 * @param readSupport Object which helps reads files of the given type, e.g. Thrift, Avro.
 */
public ParquetRecordReader(ReadSupport<T> readSupport) {
  this(readSupport, FilterCompat.NOOP);
}
 
Example 11
Source File: InternalParquetRecordReader.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
/**
 * @param readSupport Object which helps reads files of the given type, e.g. Thrift, Avro.
 */
public InternalParquetRecordReader(ReadSupport<T> readSupport) {
  this(readSupport, FilterCompat.NOOP);
}
 
Example 12
Source File: ParquetRecordReader.java    From flink with Apache License 2.0 4 votes vote down vote up
public ParquetRecordReader(ReadSupport<T> readSupport, MessageType readSchema) {
	this(readSupport, readSchema, FilterCompat.NOOP);
}
 
Example 13
Source File: ParquetReader.java    From tajo with Apache License 2.0 4 votes vote down vote up
protected Builder(Path path) {
  this.readSupport = null;
  this.file = checkNotNull(path, "path");
  this.conf = new Configuration();
  this.filter = FilterCompat.NOOP;
}
 
Example 14
Source File: ParquetReader.java    From tajo with Apache License 2.0 4 votes vote down vote up
private Builder(ReadSupport<T> readSupport, Path path) {
  this.readSupport = checkNotNull(readSupport, "readSupport");
  this.file = checkNotNull(path, "path");
  this.conf = new Configuration();
  this.filter = FilterCompat.NOOP;
}
 
Example 15
Source File: InternalParquetRecordReader.java    From tajo with Apache License 2.0 4 votes vote down vote up
/**
 * @param readSupport Object which helps reads files of the given type, e.g. Thrift, Avro.
 */
public InternalParquetRecordReader(ReadSupport<T> readSupport) {
  this(readSupport, FilterCompat.NOOP);
}
 
Example 16
Source File: ParquetReader.java    From tajo with Apache License 2.0 2 votes vote down vote up
/**
 * @param conf the configuration
 * @param file the file to read
 * @param readSupport to materialize records
 * @throws IOException
 * @deprecated use {@link #builder(ReadSupport, Path)}
 */
@Deprecated
public ParquetReader(Configuration conf, Path file, ReadSupport<T> readSupport) throws IOException {
  this(conf, file, readSupport, FilterCompat.NOOP);
}
 
Example 17
Source File: ParquetReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param file the file to read
 * @param readSupport to materialize records
 * @throws IOException if there is an error while reading
 * @deprecated use {@link #builder(ReadSupport, Path)}
 */
@Deprecated
public ParquetReader(Path file, ReadSupport<T> readSupport) throws IOException {
  this(new Configuration(), file, readSupport, FilterCompat.NOOP);
}
 
Example 18
Source File: ParquetReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param conf the configuration
 * @param file the file to read
 * @param readSupport to materialize records
 * @throws IOException if there is an error while reading
 * @deprecated use {@link #builder(ReadSupport, Path)}
 */
@Deprecated
public ParquetReader(Configuration conf, Path file, ReadSupport<T> readSupport) throws IOException {
  this(conf, file, readSupport, FilterCompat.NOOP);
}
 
Example 19
Source File: ParquetReader.java    From tajo with Apache License 2.0 2 votes vote down vote up
/**
 * @param file the file to read
 * @param readSupport to materialize records
 * @throws IOException
 * @deprecated use {@link #builder(ReadSupport, Path)}
 */
@Deprecated
public ParquetReader(Path file, ReadSupport<T> readSupport) throws IOException {
  this(new Configuration(), file, readSupport, FilterCompat.NOOP);
}