org.apache.iceberg.StructLike Java Examples

The following examples show how to use org.apache.iceberg.StructLike. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ManifestFileUtil.java    From iceberg with Apache License 2.0 6 votes vote down vote up
public static boolean canContainAny(ManifestFile manifest,
                                    Iterable<StructLike> partitions,
                                    Function<Integer, PartitionSpec> specLookup) {
  if (manifest.partitions() == null) {
    return true;
  }

  Types.StructType partitionType = specLookup.apply(manifest.partitionSpecId()).partitionType();
  List<ManifestFile.PartitionFieldSummary> fieldSummaries = manifest.partitions();
  List<Types.NestedField> fields = partitionType.fields();

  List<FieldSummary<?>> summaries = Lists.newArrayListWithExpectedSize(fieldSummaries.size());
  for (int pos = 0; pos < fieldSummaries.size(); pos += 1) {
    Type.PrimitiveType primitive = fields.get(pos).type().asPrimitiveType();
    summaries.add(new FieldSummary<>(primitive, fieldSummaries.get(pos)));
  }

  for (StructLike partition : partitions) {
    if (canContain(summaries, partition)) {
      return true;
    }
  }

  return false;
}
 
Example #2
Source File: InternalRecordWrapper.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static Function<Object, Object> converter(Type type) {
  switch (type.typeId()) {
    case DATE:
      return date -> DateTimeUtil.daysFromDate((LocalDate) date);
    case TIME:
      return time -> DateTimeUtil.microsFromTime((LocalTime) time);
    case TIMESTAMP:
      if (((Types.TimestampType) type).shouldAdjustToUTC()) {
        return timestamp -> DateTimeUtil.microsFromTimestamptz((OffsetDateTime) timestamp);
      } else {
        return timestamp -> DateTimeUtil.microsFromTimestamp((LocalDateTime) timestamp);
      }
    case FIXED:
      return bytes -> ByteBuffer.wrap((byte[]) bytes);
    case STRUCT:
      InternalRecordWrapper wrapper = new InternalRecordWrapper(type.asStructType());
      return struct -> wrapper.wrap((StructLike) struct);
    default:
  }
  return null;
}
 
Example #3
Source File: TestIcebergPartitionData.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private void verifyPartitionValue(PartitionSpec partitionSpec, IcebergPartitionData partitionData,
                                  String columnName, Class expectedClass, Object expectedValue) throws Exception {
  File tableFolder = new File(folder.getRoot(), "icebergPartitionTest");
  try {
    tableFolder.mkdir();
    File dataFile = new File(folder.getRoot(), "a.parquet");

    dataFile.createNewFile();

    DataFile d1 = DataFiles.builder(partitionSpec)
      .withInputFile(Files.localInput(dataFile))
      .withRecordCount(50)
      .withFormat(FileFormat.PARQUET)
      .withPartition(partitionData)
      .build();

    IcebergOpCommitter committer = IcebergOperation.getCreateTableCommitter(Path.of(tableFolder.toPath().toString()),
      (new SchemaConverter()).fromIceberg(schema), Lists.newArrayList(columnName), new Configuration());
    committer.consumeData(Lists.newArrayList(d1));
    committer.commit();


    Table table = new HadoopTables(new Configuration()).load(tableFolder.getPath());
    for (FileScanTask fileScanTask : table.newScan().planFiles()) {
      StructLike structLike = fileScanTask.file().partition();
      if (expectedClass == ByteBuffer.class) {
        Assert.assertEquals(structLike.get(0, expectedClass).hashCode(), ByteBuffer.wrap((byte[])expectedValue).hashCode());
      } else {
        Assert.assertTrue(structLike.get(0, expectedClass).equals(expectedValue));
      }
    }

  }
  finally {
    tableFolder.delete();
  }

}
 
Example #4
Source File: IcebergSplitSource.java    From presto with Apache License 2.0 5 votes vote down vote up
private static Map<Integer, String> getPartitionKeys(FileScanTask scanTask)
{
    StructLike partition = scanTask.file().partition();
    PartitionSpec spec = scanTask.spec();
    Map<PartitionField, Integer> fieldToIndex = getIdentityPartitions(spec);
    Map<Integer, String> partitionKeys = new HashMap<>();

    fieldToIndex.forEach((field, index) -> {
        int id = field.sourceId();
        Type type = spec.schema().findType(id);
        Class<?> javaClass = type.typeId().javaClass();
        Object value = partition.get(index, javaClass);

        if (value == null) {
            partitionKeys.put(id, null);
        }
        else {
            String partitionValue;
            if (type.typeId() == FIXED || type.typeId() == BINARY) {
                // this is safe because Iceberg PartitionData directly wraps the byte array
                partitionValue = new String(((ByteBuffer) value).array(), UTF_8);
            }
            else {
                partitionValue = value.toString();
            }
            partitionKeys.put(id, partitionValue);
        }
    });

    return Collections.unmodifiableMap(partitionKeys);
}
 
Example #5
Source File: TestIcebergCTASWithPartition.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private void verifyPartitionValue(String tableFolder, Class expectedClass, Object expectedValue) {
  Table table = new HadoopTables(new Configuration()).load(tableFolder);
  for (FileScanTask fileScanTask : table.newScan().planFiles()) {
    StructLike structLike = fileScanTask.file().partition();
    Assert.assertEquals(structLike.get(0, expectedClass), expectedValue);
  }
}
 
Example #6
Source File: TestInsertIntoTable.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private void checkSinglePartitionValue(File tableFolder, Class expectedClass, Object expectedValue) {
  Table table = new HadoopTables(new Configuration()).load(tableFolder.getPath());
  for (FileScanTask fileScanTask : table.newScan().planFiles()) {
    StructLike structLike = fileScanTask.file().partition();
    Assert.assertTrue(structLike.get(0, expectedClass).equals(expectedValue));
  }
}
 
Example #7
Source File: ManifestFileUtil.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static boolean canContain(List<FieldSummary<?>> summaries, StructLike struct) {
  if (struct.size() != summaries.size()) {
    return false;
  }

  // if any value is not contained, the struct is not contained and this can return early
  for (int pos = 0; pos < summaries.size(); pos += 1) {
    Object value = struct.get(pos, Object.class);
    if (!summaries.get(pos).canContain(value)) {
      return false;
    }
  }

  return true;
}
 
Example #8
Source File: IcebergInputFormat.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private CloseableIterable<T> applyResidualFiltering(CloseableIterable<T> iter, Expression residual,
                                                    Schema readSchema) {
  boolean applyResidual = !context.getConfiguration().getBoolean(SKIP_RESIDUAL_FILTERING, false);

  if (applyResidual && residual != null && residual != Expressions.alwaysTrue()) {
    Evaluator filter = new Evaluator(readSchema.asStruct(), residual, caseSensitive);
    return CloseableIterable.filter(iter, record -> filter.eval((StructLike) record));
  } else {
    return iter;
  }
}
 
Example #9
Source File: PartitionTable.java    From presto with Apache License 2.0 5 votes vote down vote up
public Partition(
        StructLike values,
        long recordCount,
        long size,
        Map<Integer, Object> minValues,
        Map<Integer, Object> maxValues,
        Map<Integer, Long> nullCounts)
{
    this.values = requireNonNull(values, "values is null");
    this.recordCount = recordCount;
    this.fileCount = 1;
    this.size = size;
    if (minValues == null || maxValues == null || nullCounts == null) {
        this.minValues = null;
        this.maxValues = null;
        this.nullCounts = null;
        corruptedStats = null;
    }
    else {
        this.minValues = new HashMap<>(minValues);
        this.maxValues = new HashMap<>(maxValues);
        // we are assuming if minValues is not present, max will be not be present either.
        this.corruptedStats = nonPartitionPrimitiveColumns.stream()
                .map(Types.NestedField::fieldId)
                .filter(id -> !minValues.containsKey(id) && (!nullCounts.containsKey(id) || nullCounts.get(id) != recordCount))
                .collect(toImmutableSet());
        this.nullCounts = new HashMap<>(nullCounts);
        hasValidColumnMetrics = true;
    }
}
 
Example #10
Source File: StructInternalRow.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private ArrayData collectionToArrayData(Type elementType, Collection<?> values) {
  switch (elementType.typeId()) {
    case BOOLEAN:
    case INTEGER:
    case DATE:
    case TIME:
    case LONG:
    case TIMESTAMP:
    case FLOAT:
    case DOUBLE:
      return fillArray(values, array -> (pos, value) -> array[pos] = value);
    case STRING:
      return fillArray(values, array ->
          (BiConsumer<Integer, CharSequence>) (pos, seq) -> array[pos] = UTF8String.fromString(seq.toString()));
    case FIXED:
    case BINARY:
      return fillArray(values, array ->
          (BiConsumer<Integer, ByteBuffer>) (pos, buf) -> array[pos] = ByteBuffers.toByteArray(buf));
    case DECIMAL:
      return fillArray(values, array ->
          (BiConsumer<Integer, BigDecimal>) (pos, dec) -> array[pos] = Decimal.apply(dec));
    case STRUCT:
      return fillArray(values, array -> (BiConsumer<Integer, StructLike>) (pos, tuple) ->
          array[pos] = new StructInternalRow(elementType.asStructType(), tuple));
    case LIST:
      return fillArray(values, array -> (BiConsumer<Integer, Collection<?>>) (pos, list) ->
          array[pos] = collectionToArrayData(elementType.asListType(), list));
    case MAP:
      return fillArray(values, array -> (BiConsumer<Integer, Map<?, ?>>) (pos, map) ->
          array[pos] = mapToMapData(elementType.asMapType(), map));
    default:
      throw new UnsupportedOperationException("Unsupported array element type: " + elementType);
  }
}
 
Example #11
Source File: StructInternalRow.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private StructInternalRow(Types.StructType type, StructLike struct) {
  this.type = type;
  this.struct = struct;
}
 
Example #12
Source File: InternalRecordWrapper.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public InternalRecordWrapper wrap(StructLike record) {
  this.wrapped = record;
  return this;
}
 
Example #13
Source File: StructInternalRow.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public StructInternalRow setStruct(StructLike newStruct) {
  this.struct = newStruct;
  return this;
}
 
Example #14
Source File: StructInternalRow.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public InternalRow getStruct(int ordinal, int numFields) {
  return new StructInternalRow(
      type.fields().get(ordinal).type().asStructType(),
      struct.get(ordinal, StructLike.class));
}
 
Example #15
Source File: SparkDataFile.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public StructLike partition() {
  return wrappedPartition;
}
 
Example #16
Source File: TestSparkDataFile.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private void checkStructLike(StructLike expected, StructLike actual) {
  Assert.assertEquals("Struct size should match", expected.size(), actual.size());
  for (int i = 0; i < expected.size(); i++) {
    Assert.assertEquals("Struct values must match", expected.get(i, Object.class), actual.get(i, Object.class));
  }
}
 
Example #17
Source File: StructLikeWrapper.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public static StructLikeWrapper wrap(StructLike struct) {
  return new StructLikeWrapper(struct);
}
 
Example #18
Source File: StructLikeWrapper.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private StructLikeWrapper(StructLike struct) {
  this.struct = struct;
}
 
Example #19
Source File: StructLikeWrapper.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public StructLikeWrapper set(StructLike newStruct) {
  this.struct = newStruct;
  return this;
}
 
Example #20
Source File: StructLikeWrapper.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public StructLike get() {
  return struct;
}
 
Example #21
Source File: TestIcebergInputFormat.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private DataFile writeFile(
    Table table, StructLike partitionData, FileFormat fileFormat, List<Record> records) throws IOException {
  File file = temp.newFile();
  Assert.assertTrue(file.delete());
  FileAppender<Record> appender;
  switch (fileFormat) {
    case AVRO:
      appender = Avro.write(Files.localOutput(file))
          .schema(table.schema())
          .createWriterFunc(DataWriter::create)
          .named(fileFormat.name())
          .build();
      break;
    case PARQUET:
      appender = Parquet.write(Files.localOutput(file))
          .schema(table.schema())
          .createWriterFunc(GenericParquetWriter::buildWriter)
          .named(fileFormat.name())
          .build();
      break;
    case ORC:
      appender = ORC.write(Files.localOutput(file))
          .schema(table.schema())
          .createWriterFunc(GenericOrcWriter::buildWriter)
          .build();
      break;
    default:
      throw new UnsupportedOperationException("Cannot write format: " + fileFormat);
  }

  try {
    appender.addAll(records);
  } finally {
    appender.close();
  }

  DataFiles.Builder builder = DataFiles.builder(table.spec())
      .withPath(file.toString())
      .withFormat(format)
      .withFileSizeInBytes(file.length())
      .withMetrics(appender.metrics());
  if (partitionData != null) {
    builder.withPartition(partitionData);
  }
  return builder.build();
}
 
Example #22
Source File: ResidualEvaluator.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private Expression eval(StructLike dataStruct) {
  this.struct = dataStruct;
  return ExpressionVisitors.visit(expr, this);
}
 
Example #23
Source File: ResidualEvaluator.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public Expression residualFor(StructLike ignored) {
  return expr;
}
 
Example #24
Source File: BoundReference.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public Accessor<StructLike> accessor() {
  return accessor;
}
 
Example #25
Source File: BoundReference.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public T eval(StructLike struct) {
  return (T) accessor.get(struct);
}
 
Example #26
Source File: BoundReference.java    From iceberg with Apache License 2.0 4 votes vote down vote up
BoundReference(Types.NestedField field, Accessor<StructLike> accessor) {
  this.field = field;
  this.accessor = accessor;
}
 
Example #27
Source File: BoundTransform.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public T eval(StructLike struct) {
  return transform.apply(ref.eval(struct));
}
 
Example #28
Source File: BoundPredicate.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public Boolean eval(StructLike struct) {
  return test(term().eval(struct));
}
 
Example #29
Source File: BoundPredicate.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public boolean test(StructLike struct) {
  return test(term().eval(struct));
}
 
Example #30
Source File: Evaluator.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private boolean eval(StructLike row) {
  this.struct = row;
  return ExpressionVisitors.visitEvaluator(expr, this);
}