org.apache.iceberg.transforms.Transforms Java Examples

The following examples show how to use org.apache.iceberg.transforms.Transforms. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UnboundTransform.java    From iceberg with Apache License 2.0 8 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public BoundTransform<S, T> bind(Types.StructType struct, boolean caseSensitive) {
  BoundReference<S> boundRef = ref.bind(struct, caseSensitive);

  Transform<S, T> typeTransform;
  try {
    // TODO: Avoid using toString/fromString
    typeTransform = (Transform<S, T>) Transforms.fromString(boundRef.type(), transform.toString());
    ValidationException.check(typeTransform.canTransform(boundRef.type()),
        "Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), ref.name());
  } catch (IllegalArgumentException e) {
    throw new ValidationException(
        "Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), ref.name());
  }

  return new BoundTransform<>(boundRef, typeTransform);
}
 
Example #2
Source File: TestFilteredScan.java    From iceberg with Apache License 2.0 7 votes vote down vote up
@BeforeClass
public static void startSpark() {
  TestFilteredScan.spark = SparkSession.builder().master("local[2]").getOrCreate();

  // define UDFs used by partition tests
  Transform<Long, Integer> bucket4 = Transforms.bucket(Types.LongType.get(), 4);
  spark.udf().register("bucket4", (UDF1<Long, Integer>) bucket4::apply, IntegerType$.MODULE$);

  Transform<Long, Integer> day = Transforms.day(Types.TimestampType.withZone());
  spark.udf().register("ts_day",
      (UDF1<Timestamp, Integer>) timestamp -> day.apply((Long) fromJavaTimestamp(timestamp)),
      IntegerType$.MODULE$);

  Transform<Long, Integer> hour = Transforms.hour(Types.TimestampType.withZone());
  spark.udf().register("ts_hour",
      (UDF1<Timestamp, Integer>) timestamp -> hour.apply((Long) fromJavaTimestamp(timestamp)),
      IntegerType$.MODULE$);

  spark.udf().register("data_ident", (UDF1<String, String>) data -> data, StringType$.MODULE$);
  spark.udf().register("id_ident", (UDF1<Long, Long>) id -> id, LongType$.MODULE$);
}
 
Example #3
Source File: TestFilteredScan.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void startSpark() {
  TestFilteredScan.spark = SparkSession.builder().master("local[2]").getOrCreate();

  // define UDFs used by partition tests
  Transform<Long, Integer> bucket4 = Transforms.bucket(Types.LongType.get(), 4);
  spark.udf().register("bucket4", (UDF1<Long, Integer>) bucket4::apply, IntegerType$.MODULE$);

  Transform<Long, Integer> day = Transforms.day(Types.TimestampType.withZone());
  spark.udf().register("ts_day",
      (UDF1<Timestamp, Integer>) timestamp -> day.apply((Long) fromJavaTimestamp(timestamp)),
      IntegerType$.MODULE$);

  Transform<Long, Integer> hour = Transforms.hour(Types.TimestampType.withZone());
  spark.udf().register("ts_hour",
      (UDF1<Timestamp, Integer>) timestamp -> hour.apply((Long) fromJavaTimestamp(timestamp)),
      IntegerType$.MODULE$);

  spark.udf().register("data_ident", (UDF1<String, String>) data -> data, StringType$.MODULE$);
  spark.udf().register("id_ident", (UDF1<Long, Long>) id -> id, LongType$.MODULE$);
}
 
Example #4
Source File: PartitionSpec.java    From iceberg with Apache License 2.0 5 votes vote down vote up
Builder identity(String sourceName, String targetName) {
  Types.NestedField sourceColumn = findSourceColumn(sourceName);
  checkAndAddPartitionName(targetName, sourceColumn.fieldId());
  fields.add(new PartitionField(
      sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.identity(sourceColumn.type())));
  return this;
}
 
Example #5
Source File: PartitionSpec.java    From iceberg with Apache License 2.0 5 votes vote down vote up
Builder add(int sourceId, int fieldId, String name, String transform) {
  Types.NestedField column = schema.findField(sourceId);
  checkAndAddPartitionName(name, column.fieldId());
  Preconditions.checkNotNull(column, "Cannot find source column: %s", sourceId);
  fields.add(new PartitionField(sourceId, fieldId, name, Transforms.fromString(column.type(), transform)));
  lastAssignedFieldId.getAndAccumulate(fieldId, Math::max);
  return this;
}
 
Example #6
Source File: PartitionSpec.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public Builder truncate(String sourceName, int width, String targetName) {
  checkAndAddPartitionName(targetName);
  Types.NestedField sourceColumn = findSourceColumn(sourceName);
  fields.add(new PartitionField(
      sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.truncate(sourceColumn.type(), width)));
  return this;
}
 
Example #7
Source File: PartitionSpec.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public Builder bucket(String sourceName, int numBuckets, String targetName) {
  checkAndAddPartitionName(targetName);
  Types.NestedField sourceColumn = findSourceColumn(sourceName);
  fields.add(new PartitionField(
      sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.bucket(sourceColumn.type(), numBuckets)));
  return this;
}
 
Example #8
Source File: PartitionSpec.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public Builder hour(String sourceName, String targetName) {
  checkAndAddPartitionName(targetName);
  Types.NestedField sourceColumn = findSourceColumn(sourceName);
  PartitionField field = new PartitionField(
      sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.hour(sourceColumn.type()));
  checkForRedundantPartitions(field);
  fields.add(field);
  return this;
}
 
Example #9
Source File: PartitionSpec.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public Builder day(String sourceName, String targetName) {
  checkAndAddPartitionName(targetName);
  Types.NestedField sourceColumn = findSourceColumn(sourceName);
  PartitionField field = new PartitionField(
      sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.day(sourceColumn.type()));
  checkForRedundantPartitions(field);
  fields.add(field);
  return this;
}
 
Example #10
Source File: PartitionSpec.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public Builder month(String sourceName, String targetName) {
  checkAndAddPartitionName(targetName);
  Types.NestedField sourceColumn = findSourceColumn(sourceName);
  PartitionField field = new PartitionField(
      sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.month(sourceColumn.type()));
  checkForRedundantPartitions(field);
  fields.add(field);
  return this;
}
 
Example #11
Source File: PartitionSpec.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public Builder year(String sourceName, String targetName) {
  checkAndAddPartitionName(targetName);
  Types.NestedField sourceColumn = findSourceColumn(sourceName);
  PartitionField field = new PartitionField(
      sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.year(sourceColumn.type()));
  checkForRedundantPartitions(field);
  fields.add(field);
  return this;
}
 
Example #12
Source File: TestPartitionTransforms.java    From presto with Apache License 2.0 5 votes vote down vote up
@Test
public void testToStringMatchesSpecification()
{
    assertEquals(Transforms.identity(StringType.get()).toString(), "identity");
    assertEquals(Transforms.bucket(StringType.get(), 13).toString(), "bucket[13]");
    assertEquals(Transforms.truncate(StringType.get(), 19).toString(), "truncate[19]");
    assertEquals(Transforms.year(DateType.get()).toString(), "year");
    assertEquals(Transforms.month(DateType.get()).toString(), "month");
    assertEquals(Transforms.day(DateType.get()).toString(), "day");
    assertEquals(Transforms.hour(TimestampType.withoutZone()).toString(), "hour");
}
 
Example #13
Source File: FilesTable.java    From presto with Apache License 2.0 4 votes vote down vote up
private static List<Page> buildPages(ConnectorTableMetadata tableMetadata, ConnectorSession session, Table icebergTable, Optional<Long> snapshotId)
{
    PageListBuilder pagesBuilder = PageListBuilder.forTable(tableMetadata);
    TableScan tableScan = getTableScan(session, TupleDomain.all(), snapshotId, icebergTable).includeColumnStats();
    Map<Integer, Type> idToTypeMapping = getIcebergIdToTypeMapping(icebergTable.schema());

    tableScan.planFiles().forEach(fileScanTask -> {
        DataFile dataFile = fileScanTask.file();

        pagesBuilder.beginRow();
        pagesBuilder.appendVarchar(dataFile.path().toString());
        pagesBuilder.appendVarchar(dataFile.format().name());
        pagesBuilder.appendBigint(dataFile.recordCount());
        pagesBuilder.appendBigint(dataFile.fileSizeInBytes());
        if (checkNonNull(dataFile.columnSizes(), pagesBuilder)) {
            pagesBuilder.appendIntegerBigintMap(dataFile.columnSizes());
        }
        if (checkNonNull(dataFile.valueCounts(), pagesBuilder)) {
            pagesBuilder.appendIntegerBigintMap(dataFile.valueCounts());
        }
        if (checkNonNull(dataFile.nullValueCounts(), pagesBuilder)) {
            pagesBuilder.appendIntegerBigintMap(dataFile.nullValueCounts());
        }
        if (checkNonNull(dataFile.lowerBounds(), pagesBuilder)) {
            pagesBuilder.appendIntegerVarcharMap(dataFile.lowerBounds().entrySet().stream()
                    .collect(toImmutableMap(
                            Map.Entry<Integer, ByteBuffer>::getKey,
                            entry -> Transforms.identity(idToTypeMapping.get(entry.getKey())).toHumanString(
                                    Conversions.fromByteBuffer(idToTypeMapping.get(entry.getKey()), entry.getValue())))));
        }
        if (checkNonNull(dataFile.upperBounds(), pagesBuilder)) {
            pagesBuilder.appendIntegerVarcharMap(dataFile.upperBounds().entrySet().stream()
                    .collect(toImmutableMap(
                            Map.Entry<Integer, ByteBuffer>::getKey,
                            entry -> Transforms.identity(idToTypeMapping.get(entry.getKey())).toHumanString(
                                    Conversions.fromByteBuffer(idToTypeMapping.get(entry.getKey()), entry.getValue())))));
        }
        if (checkNonNull(dataFile.keyMetadata(), pagesBuilder)) {
            pagesBuilder.appendVarbinary(Slices.wrappedBuffer(dataFile.keyMetadata()));
        }
        if (checkNonNull(dataFile.splitOffsets(), pagesBuilder)) {
            pagesBuilder.appendBigintArray(dataFile.splitOffsets());
        }
        pagesBuilder.endRow();
    });

    return pagesBuilder.build();
}
 
Example #14
Source File: Expressions.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public static <T> UnboundTerm<T> truncate(String name, int width) {
  return new UnboundTransform<>(ref(name), Transforms.truncate(Types.LongType.get(), width));
}
 
Example #15
Source File: Expressions.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public static <T> UnboundTerm<T> hour(String name) {
  return new UnboundTransform<>(ref(name), (Transform<?, T>) Transforms.hour(Types.TimestampType.withZone()));
}
 
Example #16
Source File: Expressions.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public static <T> UnboundTerm<T> day(String name) {
  return new UnboundTransform<>(ref(name), (Transform<?, T>) Transforms.day(Types.TimestampType.withZone()));
}
 
Example #17
Source File: Expressions.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public static <T> UnboundTerm<T> month(String name) {
  return new UnboundTransform<>(ref(name), (Transform<?, T>) Transforms.month(Types.TimestampType.withZone()));
}
 
Example #18
Source File: Expressions.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public static <T> UnboundTerm<T> year(String name) {
  return new UnboundTransform<>(ref(name), (Transform<?, T>) Transforms.year(Types.TimestampType.withZone()));
}
 
Example #19
Source File: PartitionSpec.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public Builder alwaysNull(String sourceName, String targetName) {
  checkAndAddPartitionName(targetName);
  Types.NestedField sourceColumn = findSourceColumn(sourceName);
  fields.add(new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.alwaysNull()));
  return this;
}
 
Example #20
Source File: Expressions.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public static <T> UnboundTerm<T> bucket(String name, int numBuckets) {
  Transform<?, T> transform = (Transform<?, T>) Transforms.bucket(Types.StringType.get(), numBuckets);
  return new UnboundTransform<>(ref(name), transform);
}