org.apache.parquet.filter.UnboundRecordFilter Java Examples

The following examples show how to use org.apache.parquet.filter.UnboundRecordFilter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetInputFormat.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private static UnboundRecordFilter getUnboundRecordFilterInstance(Configuration configuration) {
  Class<?> clazz = ConfigurationUtil.getClassFromConfig(configuration, UNBOUND_RECORD_FILTER, UnboundRecordFilter.class);
  if (clazz == null) { return null; }

  try {
    UnboundRecordFilter unboundRecordFilter = (UnboundRecordFilter) clazz.newInstance();

    if (unboundRecordFilter instanceof Configurable) {
      ((Configurable)unboundRecordFilter).setConf(configuration);
    }

    return unboundRecordFilter;
  } catch (InstantiationException | IllegalAccessException e) {
    throw new BadConfigurationException(
        "could not instantiate unbound record filter class", e);
  }
}
 
Example #2
Source File: FilterCompat.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * Given either a FilterPredicate or the class of an UnboundRecordFilter, or neither (but not both)
 * return a Filter that wraps whichever was provided.
 * <p>
 * Either filterPredicate or unboundRecordFilterClass must be null, or an exception is thrown.
 * <p>
 * If both are null, the no op filter will be returned.
 *
 * @param filterPredicate a filter predicate, or null
 * @param unboundRecordFilter an unbound record filter, or null
 * @return a Filter wrapping either the predicate or the unbound record filter (from the old API)
 */
public static Filter get(FilterPredicate filterPredicate, UnboundRecordFilter unboundRecordFilter) {
  checkArgument(filterPredicate == null || unboundRecordFilter == null,
      "Cannot provide both a FilterPredicate and an UnboundRecordFilter");

  if (filterPredicate != null) {
    return get(filterPredicate);
  }

  if (unboundRecordFilter != null) {
    return get(unboundRecordFilter);
  }

  return NOOP;
}
 
Example #3
Source File: FilteredRecordReader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * @param root          the root of the schema
 * @param validating
 * @param columnStore
 * @param unboundFilter Filter records, pass in NULL_FILTER to leave unfiltered.
 */
public FilteredRecordReader(MessageColumnIO root, RecordMaterializer<T> recordMaterializer, boolean validating,
                            ColumnReadStoreImpl columnStore, UnboundRecordFilter unboundFilter, long recordCount) {
  super(root, recordMaterializer, validating, columnStore);
  this.recordCount = recordCount;
  if ( unboundFilter != null ) {
    recordFilter = unboundFilter.bind(getColumnReaders());
  } else {
    recordFilter = null;
  }
}
 
Example #4
Source File: ParquetInputFormat.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static void setUnboundRecordFilter(Job job, Class<? extends UnboundRecordFilter> filterClass) {
  Configuration conf = ContextUtil.getConfiguration(job);
  checkArgument(getFilterPredicate(conf) == null,
      "You cannot provide an UnboundRecordFilter after providing a FilterPredicate");

  conf.set(UNBOUND_RECORD_FILTER, filterClass.getName());
}
 
Example #5
Source File: FilterCompat.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private UnboundRecordFilterCompat(UnboundRecordFilter unboundRecordFilter) {
  this.unboundRecordFilter = Objects.requireNonNull(unboundRecordFilter, "unboundRecordFilter cannot be null");
}
 
Example #6
Source File: FilterCompat.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public UnboundRecordFilter getUnboundRecordFilter() {
  return unboundRecordFilter;
}
 
Example #7
Source File: TajoParquetReader.java    From tajo with Apache License 2.0 3 votes vote down vote up
/**
 * Creates a new TajoParquetReader.
 *
 * @param conf the configuration
 * @param file The file to read from.
 * @param readSchema Tajo schema of the table.
 * @param requestedSchema Tajo schema of the projection.
 * @param recordFilter Record filter.
 */
public TajoParquetReader(Configuration conf, Path file, Schema readSchema,
                         Schema requestedSchema,
                         UnboundRecordFilter recordFilter)
    throws IOException {
  super(conf, file, new TajoReadSupport(readSchema, requestedSchema),
        recordFilter);
}
 
Example #8
Source File: MessageColumnIO.java    From parquet-mr with Apache License 2.0 3 votes vote down vote up
/**
 * @param columns a page read store with the column data
 * @param recordMaterializer a record materializer
 * @param filter a record filter
 * @param <T> the type of records returned by the reader
 * @return a record reader
 * @deprecated use getRecordReader(PageReadStore, RecordMaterializer, Filter)
 */
@Deprecated
public <T> RecordReader<T> getRecordReader(PageReadStore columns,
                                           RecordMaterializer<T> recordMaterializer,
                                           UnboundRecordFilter filter) {
  return getRecordReader(columns, recordMaterializer, FilterCompat.get(filter));
}
 
Example #9
Source File: InternalParquetRecordReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param readSupport Object which helps reads files of the given type, e.g. Thrift, Avro.
 * @param filter Optional filter for only returning matching records.
 * @deprecated use {@link #InternalParquetRecordReader(ReadSupport, Filter)}
 */
@Deprecated
public InternalParquetRecordReader(ReadSupport<T> readSupport, UnboundRecordFilter filter) {
  this(readSupport, FilterCompat.get(filter));
}
 
Example #10
Source File: AvroParquetReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param conf a configuration
 * @param file a file path
 * @param unboundRecordFilter an unbound record filter (from the old filter API)
 * @throws IOException if there is an error while reading
 * @deprecated will be removed in 2.0.0; use {@link #builder(InputFile)} instead.
 */
@Deprecated
public AvroParquetReader(Configuration conf, Path file, UnboundRecordFilter unboundRecordFilter) throws IOException {
  super(conf, file, new AvroReadSupport<T>(), unboundRecordFilter);
}
 
Example #11
Source File: AvroParquetReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param file a file path
 * @param unboundRecordFilter an unbound record filter (from the old filter API)
 * @throws IOException if there is an error while reading
 * @deprecated will be removed in 2.0.0; use {@link #builder(InputFile)} instead.
 */
@Deprecated
public AvroParquetReader(Path file, UnboundRecordFilter unboundRecordFilter) throws IOException {
  super(file, new AvroReadSupport<T>(), unboundRecordFilter);
}
 
Example #12
Source File: ProtoParquetReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param file a file path
 * @param recordFilter an unbound record filter
 * @throws IOException if there is an error while reading
 * @deprecated use {@link #builder(Path)}
 */
@Deprecated
@SuppressWarnings("unchecked")
public ProtoParquetReader(Path file, UnboundRecordFilter recordFilter) throws IOException {
  super(file, new ProtoReadSupport(), recordFilter);
}
 
Example #13
Source File: ParquetRecordReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param readSupport Object which helps reads files of the given type, e.g. Thrift, Avro.
 * @param filter for filtering individual records
 * @deprecated will be removed in 2.0.0.
 */
@Deprecated
public ParquetRecordReader(ReadSupport<T> readSupport, UnboundRecordFilter filter) {
  this(readSupport, FilterCompat.get(filter));
}
 
Example #14
Source File: ParquetInputFormat.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param configuration a configuration
 * @return an unbound record filter class
 * @deprecated use {@link #getFilter(Configuration)}
 */
@Deprecated
public static Class<?> getUnboundRecordFilter(Configuration configuration) {
  return ConfigurationUtil.getClassFromConfig(configuration, UNBOUND_RECORD_FILTER, UnboundRecordFilter.class);
}
 
Example #15
Source File: ParquetReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param conf the configuration
 * @param file the file to read
 * @param readSupport to materialize records
 * @param unboundRecordFilter the filter to use to filter records
 * @throws IOException if there is an error while reading
 * @deprecated use {@link #builder(ReadSupport, Path)}
 */
@Deprecated
public ParquetReader(Configuration conf, Path file, ReadSupport<T> readSupport, UnboundRecordFilter unboundRecordFilter) throws IOException {
  this(conf, file, readSupport, FilterCompat.get(unboundRecordFilter));
}
 
Example #16
Source File: ParquetReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param file the file to read
 * @param readSupport to materialize records
 * @param unboundRecordFilter the filter to use to filter records
 * @throws IOException if there is an error while reading
 * @deprecated use {@link #builder(ReadSupport, Path)}
 */
@Deprecated
public ParquetReader(Path file, ReadSupport<T> readSupport, UnboundRecordFilter unboundRecordFilter) throws IOException {
  this(new Configuration(), file, readSupport, FilterCompat.get(unboundRecordFilter));
}
 
Example #17
Source File: InternalParquetRecordReader.java    From tajo with Apache License 2.0 2 votes vote down vote up
/**
 * @param readSupport Object which helps reads files of the given type, e.g. Thrift, Avro.
 * @param filter Optional filter for only returning matching records.
 * @deprecated use {@link #InternalParquetRecordReader(ReadSupport, Filter)}
 */
@Deprecated
public InternalParquetRecordReader(ReadSupport<T> readSupport, UnboundRecordFilter filter) {
  this(readSupport, FilterCompat.get(filter));
}
 
Example #18
Source File: FilterCompat.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * Given an UnboundRecordFilter, return a Filter that wraps it.
 *
 * @param unboundRecordFilter an unbound record filter
 * @return a Filter for the given record filter (from the old API)
 */
public static Filter get(UnboundRecordFilter unboundRecordFilter) {
  return new UnboundRecordFilterCompat(unboundRecordFilter);
}
 
Example #19
Source File: ParquetReader.java    From tajo with Apache License 2.0 2 votes vote down vote up
/**
 * @param conf the configuration
 * @param file the file to read
 * @param readSupport to materialize records
 * @param unboundRecordFilter the filter to use to filter records
 * @throws IOException
 * @deprecated use {@link #builder(ReadSupport, Path)}
 */
@Deprecated
public ParquetReader(Configuration conf, Path file, ReadSupport<T> readSupport, UnboundRecordFilter unboundRecordFilter) throws IOException {
  this(conf, file, readSupport, FilterCompat.get(unboundRecordFilter));
}
 
Example #20
Source File: ParquetReader.java    From tajo with Apache License 2.0 2 votes vote down vote up
/**
 * @param file the file to read
 * @param readSupport to materialize records
 * @param unboundRecordFilter the filter to use to filter records
 * @throws IOException
 * @deprecated use {@link #builder(ReadSupport, Path)}
 */
@Deprecated
public ParquetReader(Path file, ReadSupport<T> readSupport, UnboundRecordFilter unboundRecordFilter) throws IOException {
  this(new Configuration(), file, readSupport, FilterCompat.get(unboundRecordFilter));
}