Java Code Examples for org.apache.iceberg.expressions.Expressions#alwaysTrue()

The following examples show how to use org.apache.iceberg.expressions.Expressions#alwaysTrue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestResiduals.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnpartitionedResiduals() {
  Expression[] expressions = new Expression[] {
      Expressions.alwaysTrue(),
      Expressions.alwaysFalse(),
      Expressions.lessThan("a", 5),
      Expressions.greaterThanOrEqual("b", 16),
      Expressions.notNull("c"),
      Expressions.isNull("d"),
      Expressions.in("e", 1, 2, 3),
      Expressions.notIn("f", 1, 2, 3)
  };

  for (Expression expr : expressions) {
    ResidualEvaluator residualEvaluator = ResidualEvaluator.of(PartitionSpec.unpartitioned(), expr, true);
    Assert.assertEquals("Should return expression",
        expr, residualEvaluator.residualFor(Row.of()));
  }
}
 
Example 2
Source File: ManifestReader.java    From iceberg with Apache License 2.0 6 votes vote down vote up
CloseableIterable<ManifestEntry<F>> entries() {
  if ((rowFilter != null && rowFilter != Expressions.alwaysTrue()) ||
      (partFilter != null && partFilter != Expressions.alwaysTrue())) {
    Evaluator evaluator = evaluator();
    InclusiveMetricsEvaluator metricsEvaluator = metricsEvaluator();

    // ensure stats columns are present for metrics evaluation
    boolean requireStatsProjection = requireStatsProjection(rowFilter, columns);
    Collection<String> projectColumns = requireStatsProjection ? withStatsColumns(columns) : columns;

    return CloseableIterable.filter(
        open(projection(fileSchema, fileProjection, projectColumns, caseSensitive)),
        entry -> entry != null &&
            evaluator.eval(entry.file().partition()) &&
            metricsEvaluator.eval(entry.file()));
  } else {
    return open(projection(fileSchema, fileProjection, columns, caseSensitive));
  }
}
 
Example 3
Source File: DataFilesTable.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
protected CloseableIterable<FileScanTask> planFiles(
    TableOperations ops, Snapshot snapshot, Expression rowFilter,
    boolean ignoreResiduals, boolean caseSensitive, boolean colStats) {
  CloseableIterable<ManifestFile> manifests = CloseableIterable.withNoopClose(snapshot.dataManifests());
  String schemaString = SchemaParser.toJson(schema());
  String specString = PartitionSpecParser.toJson(PartitionSpec.unpartitioned());
  Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : rowFilter;
  ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter);

  // Data tasks produce the table schema, not the projection schema and projection is done by processing engines.
  // This data task needs to use the table schema, which may not include a partition schema to avoid having an
  // empty struct in the schema for unpartitioned tables. Some engines, like Spark, can't handle empty structs in
  // all cases.
  return CloseableIterable.transform(manifests, manifest ->
      new ManifestReadTask(ops.io(), manifest, fileSchema, schemaString, specString, residuals));
}
 
Example 4
Source File: AllDataFilesTable.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
protected CloseableIterable<FileScanTask> planFiles(
    TableOperations ops, Snapshot snapshot, Expression rowFilter,
    boolean ignoreResiduals, boolean caseSensitive, boolean colStats) {
  CloseableIterable<ManifestFile> manifests = allDataManifestFiles(ops.current().snapshots());
  String schemaString = SchemaParser.toJson(schema());
  String specString = PartitionSpecParser.toJson(PartitionSpec.unpartitioned());
  Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : rowFilter;
  ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter);

  // Data tasks produce the table schema, not the projection schema and projection is done by processing engines.
  // This data task needs to use the table schema, which may not include a partition schema to avoid having an
  // empty struct in the schema for unpartitioned tables. Some engines, like Spark, can't handle empty structs in
  // all cases.
  return CloseableIterable.transform(manifests, manifest ->
      new DataFilesTable.ManifestReadTask(ops.io(), manifest, fileSchema, schemaString, specString, residuals));
}
 
Example 5
Source File: ManifestGroup.java    From iceberg with Apache License 2.0 5 votes vote down vote up
ManifestGroup(FileIO io, Iterable<ManifestFile> manifests) {
  this.io = io;
  this.manifests = Sets.newHashSet(manifests);
  this.dataFilter = Expressions.alwaysTrue();
  this.fileFilter = Expressions.alwaysTrue();
  this.partitionFilter = Expressions.alwaysTrue();
  this.ignoreDeleted = false;
  this.ignoreExisting = false;
  this.ignoreResiduals = false;
  this.columns = ManifestReader.ALL_COLUMNS;
  this.caseSensitive = true;
  this.manifestPredicate = m -> true;
  this.manifestEntryPredicate = e -> true;
}
 
Example 6
Source File: ManifestReader.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private InclusiveMetricsEvaluator metricsEvaluator() {
  if (lazyMetricsEvaluator == null) {
    if (rowFilter != null) {
      this.lazyMetricsEvaluator = new InclusiveMetricsEvaluator(
          spec.schema(), rowFilter, caseSensitive);
    } else {
      this.lazyMetricsEvaluator = new InclusiveMetricsEvaluator(
          spec.schema(), Expressions.alwaysTrue(), caseSensitive);
    }
  }
  return lazyMetricsEvaluator;
}
 
Example 7
Source File: ManifestReader.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private Evaluator evaluator() {
  if (lazyEvaluator == null) {
    Expression projected = Projections.inclusive(spec, caseSensitive).project(rowFilter);
    Expression finalPartFilter = Expressions.and(projected, partFilter);
    if (finalPartFilter != null) {
      this.lazyEvaluator = new Evaluator(spec.partitionType(), finalPartFilter, caseSensitive);
    } else {
      this.lazyEvaluator = new Evaluator(spec.partitionType(), Expressions.alwaysTrue(), caseSensitive);
    }
  }
  return lazyEvaluator;
}
 
Example 8
Source File: TableScanContext.java    From iceberg with Apache License 2.0 5 votes vote down vote up
TableScanContext() {
  this.snapshotId = null;
  this.rowFilter = Expressions.alwaysTrue();
  this.ignoreResiduals = false;
  this.caseSensitive = true;
  this.colStats = false;
  this.selectedColumns = null;
  this.options = ImmutableMap.of();
  this.fromSnapshotId = null;
  this.toSnapshotId = null;
}
 
Example 9
Source File: ManifestReader.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static boolean dropStats(Expression rowFilter, Collection<String> columns) {
  // Make sure we only drop all stats if we had projected all stats
  // We do not drop stats even if we had partially added some stats columns
  return rowFilter != Expressions.alwaysTrue() &&
      !columns.containsAll(ManifestReader.ALL_COLUMNS) &&
      Sets.intersection(Sets.newHashSet(columns), STATS_COLUMNS).isEmpty();
}
 
Example 10
Source File: ParquetFilters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public <T> FilterPredicate predicate(UnboundPredicate<T> pred) {
  Expression bound = bind(pred);
  if (bound instanceof BoundPredicate) {
    return predicate((BoundPredicate<?>) bound);
  } else if (bound == Expressions.alwaysTrue()) {
    return AlwaysTrue.INSTANCE;
  } else if (bound == Expressions.alwaysFalse()) {
    return AlwaysFalse.INSTANCE;
  }
  throw new UnsupportedOperationException("Cannot convert to Parquet filter: " + pred);
}
 
Example 11
Source File: ParquetReader.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public ParquetReader(InputFile input, Schema expectedSchema, ParquetReadOptions options,
                     Function<MessageType, ParquetValueReader<?>> readerFunc, NameMapping nameMapping,
                     Expression filter, boolean reuseContainers, boolean caseSensitive) {
  this.input = input;
  this.expectedSchema = expectedSchema;
  this.options = options;
  this.readerFunc = readerFunc;
  // replace alwaysTrue with null to avoid extra work evaluating a trivial filter
  this.filter = filter == Expressions.alwaysTrue() ? null : filter;
  this.reuseContainers = reuseContainers;
  this.caseSensitive = caseSensitive;
  this.nameMapping = nameMapping;
}
 
Example 12
Source File: OrcIterable.java    From iceberg with Apache License 2.0 5 votes vote down vote up
OrcIterable(InputFile file, Configuration config, Schema schema,
            Long start, Long length,
            Function<TypeDescription, OrcRowReader<?>> readerFunction, boolean caseSensitive, Expression filter) {
  this.schema = schema;
  this.readerFunction = readerFunction;
  this.file = file;
  this.start = start;
  this.length = length;
  this.config = config;
  this.caseSensitive = caseSensitive;
  this.filter = (filter == Expressions.alwaysTrue()) ? null : filter;
}
 
Example 13
Source File: IcebergFilterGenerator.java    From metacat with Apache License 2.0 5 votes vote down vote up
private Expression evalSingleTerm(final ASTCOMPARE node, final Object data) {
    final Object value = node.jjtGetChild(0).jjtAccept(this, data);
    if (value != null) {
        return Boolean.parseBoolean(value.toString())
            ? Expressions.alwaysTrue() : Expressions.alwaysFalse();
    }
    return Expressions.alwaysFalse();
}
 
Example 14
Source File: SparkWriteBuilder.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public WriteBuilder overwrite(Filter[] filters) {
  this.overwriteExpr = SparkFilters.convert(filters);
  if (overwriteExpr == Expressions.alwaysTrue() && "dynamic".equals(overwriteMode)) {
    // use the write option to override truncating the table. use dynamic overwrite instead.
    this.overwriteDynamic = true;
  } else {
    Preconditions.checkState(!overwriteDynamic, "Cannot overwrite dynamically and by filter: %s", overwriteExpr);
    this.overwriteByFilter = true;
  }
  return this;
}
 
Example 15
Source File: SparkStreamingWrite.java    From iceberg with Apache License 2.0 5 votes vote down vote up
SparkStreamingWrite(Table table, Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager,
                    CaseInsensitiveStringMap options, boolean truncateBatches, String queryId,
                    String applicationId, String wapId, Schema writeSchema, StructType dsSchema) {
  super(
      table, io, encryptionManager, options, false, truncateBatches, Expressions.alwaysTrue(), applicationId, wapId,
      writeSchema, dsSchema);
  this.truncateBatches = truncateBatches;
  this.queryId = queryId;
}
 
Example 16
Source File: RewriteDataFilesAction.java    From iceberg with Apache License 2.0 5 votes vote down vote up
RewriteDataFilesAction(SparkSession spark, Table table) {
  this.sparkContext = new JavaSparkContext(spark.sparkContext());
  this.table = table;
  this.spec = table.spec();
  this.filter = Expressions.alwaysTrue();
  this.caseSensitive = Boolean.parseBoolean(spark.conf().get("spark.sql.caseSensitive", "false"));

  long splitSize = PropertyUtil.propertyAsLong(
      table.properties(),
      TableProperties.SPLIT_SIZE,
      TableProperties.SPLIT_SIZE_DEFAULT);
  long targetFileSize = PropertyUtil.propertyAsLong(
      table.properties(),
      TableProperties.WRITE_TARGET_FILE_SIZE_BYTES,
      TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT);
  this.targetSizeInBytes = Math.min(splitSize, targetFileSize);

  this.splitLookback = PropertyUtil.propertyAsInt(
      table.properties(),
      TableProperties.SPLIT_LOOKBACK,
      TableProperties.SPLIT_LOOKBACK_DEFAULT);
  this.splitOpenFileCost = PropertyUtil.propertyAsLong(
      table.properties(),
      TableProperties.SPLIT_OPEN_FILE_COST,
      TableProperties.SPLIT_OPEN_FILE_COST_DEFAULT);

  this.fileIO = SparkUtil.serializableFileIO(table);
  this.encryptionManager = table.encryption();
}
 
Example 17
Source File: IcebergInputFormat.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private CloseableIterable<T> applyResidualFiltering(CloseableIterable<T> iter, Expression residual,
                                                    Schema readSchema) {
  boolean applyResidual = !context.getConfiguration().getBoolean(SKIP_RESIDUAL_FILTERING, false);

  if (applyResidual && residual != null && residual != Expressions.alwaysTrue()) {
    Evaluator filter = new Evaluator(readSchema.asStruct(), residual, caseSensitive);
    return CloseableIterable.filter(iter, record -> filter.eval((StructLike) record));
  } else {
    return iter;
  }
}
 
Example 18
Source File: ScanSummary.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static Expression joinFilters(List<Expression> expressions) {
  Expression result = Expressions.alwaysTrue();
  for (Expression expression : expressions) {
    result = Expressions.and(result, expression);
  }
  return result;
}
 
Example 19
Source File: ManifestReader.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static boolean requireStatsProjection(Expression rowFilter, Collection<String> columns) {
  // Make sure we have all stats columns for metrics evaluator
  return rowFilter != Expressions.alwaysTrue() &&
      !columns.containsAll(ManifestReader.ALL_COLUMNS) &&
      !columns.containsAll(STATS_COLUMNS);
}
 
Example 20
Source File: Reader.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private Expression filterExpression() {
  if (filterExpressions != null) {
    return filterExpressions.stream().reduce(Expressions.alwaysTrue(), Expressions::and);
  }
  return Expressions.alwaysTrue();
}