Java Code Examples for org.apache.iceberg.expressions.Expressions#alwaysTrue()

The following examples show how to use org.apache.iceberg.expressions.Expressions#alwaysTrue() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: iceberg   File: TestResiduals.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testUnpartitionedResiduals() {
  Expression[] expressions = new Expression[] {
      Expressions.alwaysTrue(),
      Expressions.alwaysFalse(),
      Expressions.lessThan("a", 5),
      Expressions.greaterThanOrEqual("b", 16),
      Expressions.notNull("c"),
      Expressions.isNull("d"),
      Expressions.in("e", 1, 2, 3),
      Expressions.notIn("f", 1, 2, 3)
  };

  for (Expression expr : expressions) {
    ResidualEvaluator residualEvaluator = ResidualEvaluator.of(PartitionSpec.unpartitioned(), expr, true);
    Assert.assertEquals("Should return expression",
        expr, residualEvaluator.residualFor(Row.of()));
  }
}
 
Example 2
Source Project: iceberg   File: AllDataFilesTable.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected CloseableIterable<FileScanTask> planFiles(
    TableOperations ops, Snapshot snapshot, Expression rowFilter,
    boolean ignoreResiduals, boolean caseSensitive, boolean colStats) {
  CloseableIterable<ManifestFile> manifests = allDataManifestFiles(ops.current().snapshots());
  String schemaString = SchemaParser.toJson(schema());
  String specString = PartitionSpecParser.toJson(PartitionSpec.unpartitioned());
  Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : rowFilter;
  ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter);

  // Data tasks produce the table schema, not the projection schema and projection is done by processing engines.
  // This data task needs to use the table schema, which may not include a partition schema to avoid having an
  // empty struct in the schema for unpartitioned tables. Some engines, like Spark, can't handle empty structs in
  // all cases.
  return CloseableIterable.transform(manifests, manifest ->
      new DataFilesTable.ManifestReadTask(ops.io(), manifest, fileSchema, schemaString, specString, residuals));
}
 
Example 3
Source Project: iceberg   File: DataFilesTable.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected CloseableIterable<FileScanTask> planFiles(
    TableOperations ops, Snapshot snapshot, Expression rowFilter,
    boolean ignoreResiduals, boolean caseSensitive, boolean colStats) {
  CloseableIterable<ManifestFile> manifests = CloseableIterable.withNoopClose(snapshot.dataManifests());
  String schemaString = SchemaParser.toJson(schema());
  String specString = PartitionSpecParser.toJson(PartitionSpec.unpartitioned());
  Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : rowFilter;
  ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter);

  // Data tasks produce the table schema, not the projection schema and projection is done by processing engines.
  // This data task needs to use the table schema, which may not include a partition schema to avoid having an
  // empty struct in the schema for unpartitioned tables. Some engines, like Spark, can't handle empty structs in
  // all cases.
  return CloseableIterable.transform(manifests, manifest ->
      new ManifestReadTask(ops.io(), manifest, fileSchema, schemaString, specString, residuals));
}
 
Example 4
Source Project: iceberg   File: ManifestReader.java    License: Apache License 2.0 6 votes vote down vote up
CloseableIterable<ManifestEntry<F>> entries() {
  if ((rowFilter != null && rowFilter != Expressions.alwaysTrue()) ||
      (partFilter != null && partFilter != Expressions.alwaysTrue())) {
    Evaluator evaluator = evaluator();
    InclusiveMetricsEvaluator metricsEvaluator = metricsEvaluator();

    // ensure stats columns are present for metrics evaluation
    boolean requireStatsProjection = requireStatsProjection(rowFilter, columns);
    Collection<String> projectColumns = requireStatsProjection ? withStatsColumns(columns) : columns;

    return CloseableIterable.filter(
        open(projection(fileSchema, fileProjection, projectColumns, caseSensitive)),
        entry -> entry != null &&
            evaluator.eval(entry.file().partition()) &&
            metricsEvaluator.eval(entry.file()));
  } else {
    return open(projection(fileSchema, fileProjection, columns, caseSensitive));
  }
}
 
Example 5
Source Project: iceberg   File: OrcIterable.java    License: Apache License 2.0 5 votes vote down vote up
OrcIterable(InputFile file, Configuration config, Schema schema,
            Long start, Long length,
            Function<TypeDescription, OrcRowReader<?>> readerFunction, boolean caseSensitive, Expression filter) {
  this.schema = schema;
  this.readerFunction = readerFunction;
  this.file = file;
  this.start = start;
  this.length = length;
  this.config = config;
  this.caseSensitive = caseSensitive;
  this.filter = (filter == Expressions.alwaysTrue()) ? null : filter;
}
 
Example 6
Source Project: iceberg   File: ScanSummary.java    License: Apache License 2.0 5 votes vote down vote up
static Expression joinFilters(List<Expression> expressions) {
  Expression result = Expressions.alwaysTrue();
  for (Expression expression : expressions) {
    result = Expressions.and(result, expression);
  }
  return result;
}
 
Example 7
Source Project: iceberg   File: IcebergInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
private CloseableIterable<T> applyResidualFiltering(CloseableIterable<T> iter, Expression residual,
                                                    Schema readSchema) {
  boolean applyResidual = !context.getConfiguration().getBoolean(SKIP_RESIDUAL_FILTERING, false);

  if (applyResidual && residual != null && residual != Expressions.alwaysTrue()) {
    Evaluator filter = new Evaluator(readSchema.asStruct(), residual, caseSensitive);
    return CloseableIterable.filter(iter, record -> filter.eval((StructLike) record));
  } else {
    return iter;
  }
}
 
Example 8
Source Project: iceberg   File: RewriteDataFilesAction.java    License: Apache License 2.0 5 votes vote down vote up
RewriteDataFilesAction(SparkSession spark, Table table) {
  this.sparkContext = new JavaSparkContext(spark.sparkContext());
  this.table = table;
  this.spec = table.spec();
  this.filter = Expressions.alwaysTrue();
  this.caseSensitive = Boolean.parseBoolean(spark.conf().get("spark.sql.caseSensitive", "false"));

  long splitSize = PropertyUtil.propertyAsLong(
      table.properties(),
      TableProperties.SPLIT_SIZE,
      TableProperties.SPLIT_SIZE_DEFAULT);
  long targetFileSize = PropertyUtil.propertyAsLong(
      table.properties(),
      TableProperties.WRITE_TARGET_FILE_SIZE_BYTES,
      TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT);
  this.targetSizeInBytes = Math.min(splitSize, targetFileSize);

  this.splitLookback = PropertyUtil.propertyAsInt(
      table.properties(),
      TableProperties.SPLIT_LOOKBACK,
      TableProperties.SPLIT_LOOKBACK_DEFAULT);
  this.splitOpenFileCost = PropertyUtil.propertyAsLong(
      table.properties(),
      TableProperties.SPLIT_OPEN_FILE_COST,
      TableProperties.SPLIT_OPEN_FILE_COST_DEFAULT);

  this.fileIO = SparkUtil.serializableFileIO(table);
  this.encryptionManager = table.encryption();
}
 
Example 9
Source Project: iceberg   File: SparkStreamingWrite.java    License: Apache License 2.0 5 votes vote down vote up
SparkStreamingWrite(Table table, Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager,
                    CaseInsensitiveStringMap options, boolean truncateBatches, String queryId,
                    String applicationId, String wapId, Schema writeSchema, StructType dsSchema) {
  super(
      table, io, encryptionManager, options, false, truncateBatches, Expressions.alwaysTrue(), applicationId, wapId,
      writeSchema, dsSchema);
  this.truncateBatches = truncateBatches;
  this.queryId = queryId;
}
 
Example 10
Source Project: iceberg   File: SparkWriteBuilder.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public WriteBuilder overwrite(Filter[] filters) {
  this.overwriteExpr = SparkFilters.convert(filters);
  if (overwriteExpr == Expressions.alwaysTrue() && "dynamic".equals(overwriteMode)) {
    // use the write option to override truncating the table. use dynamic overwrite instead.
    this.overwriteDynamic = true;
  } else {
    Preconditions.checkState(!overwriteDynamic, "Cannot overwrite dynamically and by filter: %s", overwriteExpr);
    this.overwriteByFilter = true;
  }
  return this;
}
 
Example 11
Source Project: metacat   File: IcebergFilterGenerator.java    License: Apache License 2.0 5 votes vote down vote up
private Expression evalSingleTerm(final ASTCOMPARE node, final Object data) {
    final Object value = node.jjtGetChild(0).jjtAccept(this, data);
    if (value != null) {
        return Boolean.parseBoolean(value.toString())
            ? Expressions.alwaysTrue() : Expressions.alwaysFalse();
    }
    return Expressions.alwaysFalse();
}
 
Example 12
Source Project: iceberg   File: ManifestGroup.java    License: Apache License 2.0 5 votes vote down vote up
ManifestGroup(FileIO io, Iterable<ManifestFile> manifests) {
  this.io = io;
  this.manifests = Sets.newHashSet(manifests);
  this.dataFilter = Expressions.alwaysTrue();
  this.fileFilter = Expressions.alwaysTrue();
  this.partitionFilter = Expressions.alwaysTrue();
  this.ignoreDeleted = false;
  this.ignoreExisting = false;
  this.ignoreResiduals = false;
  this.columns = ManifestReader.ALL_COLUMNS;
  this.caseSensitive = true;
  this.manifestPredicate = m -> true;
  this.manifestEntryPredicate = e -> true;
}
 
Example 13
Source Project: iceberg   File: ParquetReader.java    License: Apache License 2.0 5 votes vote down vote up
public ParquetReader(InputFile input, Schema expectedSchema, ParquetReadOptions options,
                     Function<MessageType, ParquetValueReader<?>> readerFunc, NameMapping nameMapping,
                     Expression filter, boolean reuseContainers, boolean caseSensitive) {
  this.input = input;
  this.expectedSchema = expectedSchema;
  this.options = options;
  this.readerFunc = readerFunc;
  // replace alwaysTrue with null to avoid extra work evaluating a trivial filter
  this.filter = filter == Expressions.alwaysTrue() ? null : filter;
  this.reuseContainers = reuseContainers;
  this.caseSensitive = caseSensitive;
  this.nameMapping = nameMapping;
}
 
Example 14
Source Project: iceberg   File: ParquetFilters.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public <T> FilterPredicate predicate(UnboundPredicate<T> pred) {
  Expression bound = bind(pred);
  if (bound instanceof BoundPredicate) {
    return predicate((BoundPredicate<?>) bound);
  } else if (bound == Expressions.alwaysTrue()) {
    return AlwaysTrue.INSTANCE;
  } else if (bound == Expressions.alwaysFalse()) {
    return AlwaysFalse.INSTANCE;
  }
  throw new UnsupportedOperationException("Cannot convert to Parquet filter: " + pred);
}
 
Example 15
Source Project: iceberg   File: ManifestReader.java    License: Apache License 2.0 5 votes vote down vote up
static boolean dropStats(Expression rowFilter, Collection<String> columns) {
  // Make sure we only drop all stats if we had projected all stats
  // We do not drop stats even if we had partially added some stats columns
  return rowFilter != Expressions.alwaysTrue() &&
      !columns.containsAll(ManifestReader.ALL_COLUMNS) &&
      Sets.intersection(Sets.newHashSet(columns), STATS_COLUMNS).isEmpty();
}
 
Example 16
Source Project: iceberg   File: TableScanContext.java    License: Apache License 2.0 5 votes vote down vote up
TableScanContext() {
  this.snapshotId = null;
  this.rowFilter = Expressions.alwaysTrue();
  this.ignoreResiduals = false;
  this.caseSensitive = true;
  this.colStats = false;
  this.selectedColumns = null;
  this.options = ImmutableMap.of();
  this.fromSnapshotId = null;
  this.toSnapshotId = null;
}
 
Example 17
Source Project: iceberg   File: ManifestReader.java    License: Apache License 2.0 5 votes vote down vote up
private Evaluator evaluator() {
  if (lazyEvaluator == null) {
    Expression projected = Projections.inclusive(spec, caseSensitive).project(rowFilter);
    Expression finalPartFilter = Expressions.and(projected, partFilter);
    if (finalPartFilter != null) {
      this.lazyEvaluator = new Evaluator(spec.partitionType(), finalPartFilter, caseSensitive);
    } else {
      this.lazyEvaluator = new Evaluator(spec.partitionType(), Expressions.alwaysTrue(), caseSensitive);
    }
  }
  return lazyEvaluator;
}
 
Example 18
Source Project: iceberg   File: ManifestReader.java    License: Apache License 2.0 5 votes vote down vote up
private InclusiveMetricsEvaluator metricsEvaluator() {
  if (lazyMetricsEvaluator == null) {
    if (rowFilter != null) {
      this.lazyMetricsEvaluator = new InclusiveMetricsEvaluator(
          spec.schema(), rowFilter, caseSensitive);
    } else {
      this.lazyMetricsEvaluator = new InclusiveMetricsEvaluator(
          spec.schema(), Expressions.alwaysTrue(), caseSensitive);
    }
  }
  return lazyMetricsEvaluator;
}
 
Example 19
Source Project: iceberg   File: Reader.java    License: Apache License 2.0 4 votes vote down vote up
private Expression filterExpression() {
  if (filterExpressions != null) {
    return filterExpressions.stream().reduce(Expressions.alwaysTrue(), Expressions::and);
  }
  return Expressions.alwaysTrue();
}
 
Example 20
Source Project: iceberg   File: ManifestReader.java    License: Apache License 2.0 4 votes vote down vote up
private static boolean requireStatsProjection(Expression rowFilter, Collection<String> columns) {
  // Make sure we have all stats columns for metrics evaluator
  return rowFilter != Expressions.alwaysTrue() &&
      !columns.containsAll(ManifestReader.ALL_COLUMNS) &&
      !columns.containsAll(STATS_COLUMNS);
}