org.apache.iceberg.types.Types Java Examples

The following examples show how to use org.apache.iceberg.types.Types. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UnboundTransform.java    From iceberg with Apache License 2.0 8 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public BoundTransform<S, T> bind(Types.StructType struct, boolean caseSensitive) {
  BoundReference<S> boundRef = ref.bind(struct, caseSensitive);

  Transform<S, T> typeTransform;
  try {
    // TODO: Avoid using toString/fromString
    typeTransform = (Transform<S, T>) Transforms.fromString(boundRef.type(), transform.toString());
    ValidationException.check(typeTransform.canTransform(boundRef.type()),
        "Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), ref.name());
  } catch (IllegalArgumentException e) {
    throw new ValidationException(
        "Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), ref.name());
  }

  return new BoundTransform<>(boundRef, typeTransform);
}
 
Example #2
Source File: AvroSchemaWithTypeVisitor.java    From iceberg with Apache License 2.0 6 votes vote down vote up
public static <T> T visit(Type iType, Schema schema, AvroSchemaWithTypeVisitor<T> visitor) {
  switch (schema.getType()) {
    case RECORD:
      return visitRecord(iType != null ? iType.asStructType() : null, schema, visitor);

    case UNION:
      return visitUnion(iType, schema, visitor);

    case ARRAY:
      return visitArray(iType, schema, visitor);

    case MAP:
      Types.MapType map = iType != null ? iType.asMapType() : null;
      return visitor.map(map, schema,
          visit(map != null ? map.valueType() : null, schema.getValueType(), visitor));

    default:
      return visitor.primitive(iType != null ? iType.asPrimitiveType() : null, schema);
  }
}
 
Example #3
Source File: ArrowSchemaUtilTest.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void convertPrimitive() {
  Schema iceberg = new Schema(
      Types.NestedField.optional(0, INTEGER_FIELD, IntegerType.get()),
      Types.NestedField.optional(1, BOOLEAN_FIELD, BooleanType.get()),
      Types.NestedField.required(2, DOUBLE_FIELD, DoubleType.get()),
      Types.NestedField.required(3, STRING_FIELD, StringType.get()),
      Types.NestedField.optional(4, DATE_FIELD, DateType.get()),
      Types.NestedField.optional(5, TIMESTAMP_FIELD, TimestampType.withZone()),
      Types.NestedField.optional(6, LONG_FIELD, LongType.get()),
      Types.NestedField.optional(7, FLOAT_FIELD, FloatType.get()),
      Types.NestedField.optional(8, TIME_FIELD, TimeType.get()),
      Types.NestedField.optional(9, BINARY_FIELD, Types.BinaryType.get()),
      Types.NestedField.optional(10, DECIMAL_FIELD, Types.DecimalType.of(1, 1)),
      Types.NestedField.optional(12, LIST_FIELD, Types.ListType.ofOptional(13, Types.IntegerType.get())),
      Types.NestedField.required(14, MAP_FIELD, Types.MapType.ofOptional(15, 16,
          StringType.get(), IntegerType.get())),
      Types.NestedField.optional(17, FIXED_WIDTH_BINARY_FIELD, Types.FixedType.ofLength(10)));

  org.apache.arrow.vector.types.pojo.Schema arrow = ArrowSchemaUtil.convert(iceberg);

  validate(iceberg, arrow);
}
 
Example #4
Source File: TestReadProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testEmptyProjection() throws Exception {
  Schema schema = new Schema(
      Types.NestedField.required(0, "id", Types.LongType.get()),
      Types.NestedField.optional(1, "data", Types.StringType.get())
  );

  Record record = GenericRecord.create(schema);
  record.setField("id", 34L);
  record.setField("data", "test");

  Record projected = writeAndRead("empty_projection", schema, schema.select(), record);

  Assert.assertNotNull("Should read a non-null record", projected);
  try {
    projected.get(0);
    Assert.fail("Should not retrieve value with ordinal 0");
  } catch (ArrayIndexOutOfBoundsException e) {
    // this is expected because there are no values
  }
}
 
Example #5
Source File: PartitionTable.java    From presto with Apache License 2.0 6 votes vote down vote up
private static Object convert(Object value, Type type)
{
    if (value == null) {
        return null;
    }
    if (type instanceof Types.StringType) {
        return value.toString();
    }
    if (type instanceof Types.BinaryType) {
        // TODO the client sees the bytearray's tostring ouput instead of seeing actual bytes, needs to be fixed.
        return ((ByteBuffer) value).array();
    }
    if (type instanceof Types.TimestampType) {
        long utcMillis = TimeUnit.MICROSECONDS.toMillis((Long) value);
        Types.TimestampType timestampType = (Types.TimestampType) type;
        if (timestampType.shouldAdjustToUTC()) {
            return packDateTimeWithZone(utcMillis, TimeZoneKey.UTC_KEY);
        }
        return utcMillis;
    }
    if (type instanceof Types.FloatType) {
        return Float.floatToIntBits((Float) value);
    }
    return value;
}
 
Example #6
Source File: TestPredicateBinding.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testIsNull() {
  StructType optional = StructType.of(optional(19, "s", Types.StringType.get()));

  UnboundPredicate<?> unbound = new UnboundPredicate<>(IS_NULL, ref("s"));
  Expression expr = unbound.bind(optional);
  BoundPredicate<?> bound = assertAndUnwrap(expr);
  Assert.assertEquals("Should use the same operation", IS_NULL, bound.op());
  Assert.assertEquals("Should use the correct field", 19, bound.ref().fieldId());
  Assert.assertTrue("Should be a unary predicate", bound.isUnaryPredicate());

  StructType required = StructType.of(required(20, "s", Types.StringType.get()));
  Assert.assertEquals("IsNull inclusive a required field should be alwaysFalse",
      Expressions.alwaysFalse(), unbound.bind(required));
}
 
Example #7
Source File: Literals.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T> Literal<T> to(Type type) {
  switch (type.typeId()) {
    case INTEGER:
      return (Literal<T>) this;
    case LONG:
      return (Literal<T>) new LongLiteral(value().longValue());
    case FLOAT:
      return (Literal<T>) new FloatLiteral(value().floatValue());
    case DOUBLE:
      return (Literal<T>) new DoubleLiteral(value().doubleValue());
    case DATE:
      return (Literal<T>) new DateLiteral(value());
    case DECIMAL:
      int scale = ((Types.DecimalType) type).scale();
      // rounding mode isn't necessary, but pass one to avoid warnings
      return (Literal<T>) new DecimalLiteral(
          BigDecimal.valueOf(value()).setScale(scale, RoundingMode.HALF_UP));
    default:
      return null;
  }
}
 
Example #8
Source File: TestFilteredScan.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void startSpark() {
  TestFilteredScan.spark = SparkSession.builder().master("local[2]").getOrCreate();

  // define UDFs used by partition tests
  Transform<Long, Integer> bucket4 = Transforms.bucket(Types.LongType.get(), 4);
  spark.udf().register("bucket4", (UDF1<Long, Integer>) bucket4::apply, IntegerType$.MODULE$);

  Transform<Long, Integer> day = Transforms.day(Types.TimestampType.withZone());
  spark.udf().register("ts_day",
      (UDF1<Timestamp, Integer>) timestamp -> day.apply((Long) fromJavaTimestamp(timestamp)),
      IntegerType$.MODULE$);

  Transform<Long, Integer> hour = Transforms.hour(Types.TimestampType.withZone());
  spark.udf().register("ts_hour",
      (UDF1<Timestamp, Integer>) timestamp -> hour.apply((Long) fromJavaTimestamp(timestamp)),
      IntegerType$.MODULE$);

  spark.udf().register("data_ident", (UDF1<String, String>) data -> data, StringType$.MODULE$);
  spark.udf().register("id_ident", (UDF1<Long, Long>) id -> id, LongType$.MODULE$);
}
 
Example #9
Source File: TestTruncatesProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testBinaryInclusive() throws Exception {
  ByteBuffer value = ByteBuffer.wrap("abcdefg".getBytes("UTF-8"));
  Schema schema = new Schema(optional(1, "value", Types.BinaryType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 5).build();
  String expectedValue = TransformUtil.base64encode(ByteBuffer.wrap("abcde".getBytes("UTF-8")));

  assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, expectedValue);
  assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, expectedValue);
  assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, expectedValue);
  assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, expectedValue);
  assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, expectedValue);
  assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);

  ByteBuffer anotherValue = ByteBuffer.wrap("abcdehij".getBytes("UTF-8"));
  assertProjectionInclusive(spec, in("value", value, anotherValue),
      Expression.Operation.IN, String.format("[%s, %s]", expectedValue, expectedValue));
  assertProjectionInclusiveValue(spec, notIn("value", value, anotherValue), Expression.Operation.TRUE);
}
 
Example #10
Source File: ParquetSchemaUtil.java    From iceberg with Apache License 2.0 6 votes vote down vote up
/**
 * Prunes columns from a Parquet file schema that was written without field ids.
 * <p>
 * Files that were written without field ids are read assuming that schema evolution preserved
 * column order. Deleting columns was not allowed.
 * <p>
 * The order of columns in the resulting Parquet schema matches the Parquet file.
 *
 * @param fileSchema schema from a Parquet file that does not have field ids.
 * @param expectedSchema expected schema
 * @return a parquet schema pruned using the expected schema
 */
public static MessageType pruneColumnsFallback(MessageType fileSchema, Schema expectedSchema) {
  Set<Integer> selectedIds = Sets.newHashSet();

  for (Types.NestedField field : expectedSchema.columns()) {
    selectedIds.add(field.fieldId());
  }

  MessageTypeBuilder builder = org.apache.parquet.schema.Types.buildMessage();

  int ordinal = 1;
  for (Type type : fileSchema.getFields()) {
    if (selectedIds.contains(ordinal)) {
      builder.addField(type.withId(ordinal));
    }
    ordinal += 1;
  }

  return builder.named(fileSchema.getName());
}
 
Example #11
Source File: TestHelpers.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static void assertEqualsSafe(Types.MapType map,
                                     Map<?, ?> expected, Map<?, ?> actual) {
  Type keyType = map.keyType();
  Type valueType = map.valueType();

  for (Object expectedKey : expected.keySet()) {
    Object matchingKey = null;
    for (Object actualKey : actual.keySet()) {
      try {
        assertEqualsSafe(keyType, expectedKey, actualKey);
        matchingKey = actualKey;
      } catch (AssertionError e) {
        // failed
      }
    }

    Assert.assertNotNull("Should have a matching key", matchingKey);
    assertEqualsSafe(valueType, expected.get(expectedKey), actual.get(matchingKey));
  }
}
 
Example #12
Source File: TestTruncatesProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testLongStrictUpperBound() {
  Long value = 99L;
  Schema schema = new Schema(optional(1, "value", Types.LongType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();

  assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, "90");
  assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, "100");
  assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, "90");
  assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, "90");
  assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "90");
  assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);

  assertProjectionStrict(spec, notIn("value", value - 1, value, value + 1),
      Expression.Operation.NOT_IN, "[90, 90, 100]");
  assertProjectionStrictValue(spec, in("value", value, value - 1), Expression.Operation.FALSE);
}
 
Example #13
Source File: TestBucketingProjection.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testBucketUUIDStrict() {
  UUID value = new UUID(123L, 456L);
  Schema schema = new Schema(optional(1, "value", Types.UUIDType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("value", 10).build();

  // the bucket number of the value (i.e. UUID(123L, 456L)) is 4
  assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "4");
  assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, lessThan("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, lessThanOrEqual("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, greaterThan("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, greaterThanOrEqual("value", value), Expression.Operation.FALSE);

  UUID anotherValue = new UUID(456L, 123L);
  assertProjectionStrict(spec, notIn("value", value, anotherValue),
      Expression.Operation.NOT_IN, "[4, 6]");
  assertProjectionStrictValue(spec, in("value", value, anotherValue), Expression.Operation.FALSE);
}
 
Example #14
Source File: OrcSchemaWithTypeVisitor.java    From iceberg with Apache License 2.0 6 votes vote down vote up
public static <T> T visit(Type iType, TypeDescription schema, OrcSchemaWithTypeVisitor<T> visitor) {
  switch (schema.getCategory()) {
    case STRUCT:
      return visitRecord(iType != null ? iType.asStructType() : null, schema, visitor);

    case UNION:
      throw new UnsupportedOperationException("Cannot handle " + schema);

    case LIST:
      Types.ListType list = iType != null ? iType.asListType() : null;
      return visitor.list(
          list, schema,
          visit(list.elementType(), schema.getChildren().get(0), visitor));

    case MAP:
      Types.MapType map = iType != null ? iType.asMapType() : null;
      return visitor.map(
          map, schema,
          visit(map != null ? map.keyType() : null, schema.getChildren().get(0), visitor),
          visit(map != null ? map.valueType() : null, schema.getChildren().get(1), visitor));

    default:
      return visitor.primitive(iType != null ? iType.asPrimitiveType() : null, schema);
  }
}
 
Example #15
Source File: GenericManifestFile.java    From iceberg with Apache License 2.0 6 votes vote down vote up
/**
 * Used by Avro reflection to instantiate this class when reading manifest files.
 */
public GenericManifestFile(org.apache.avro.Schema avroSchema) {
  this.avroSchema = avroSchema;

  List<Types.NestedField> fields = AvroSchemaUtil.convert(avroSchema).asStructType().fields();
  List<Types.NestedField> allFields = ManifestFile.schema().asStruct().fields();

  this.fromProjectionPos = new int[fields.size()];
  for (int i = 0; i < fromProjectionPos.length; i += 1) {
    boolean found = false;
    for (int j = 0; j < allFields.size(); j += 1) {
      if (fields.get(i).fieldId() == allFields.get(j).fieldId()) {
        found = true;
        fromProjectionPos[i] = j;
      }
    }

    if (!found) {
      throw new IllegalArgumentException("Cannot find projected field: " + fields.get(i));
    }
  }
}
 
Example #16
Source File: TestSchemaUpdate.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testMoveNestedFieldBeforeFirst() {
  Schema schema = new Schema(
      required(1, "id", Types.LongType.get()),
      required(2, "struct", Types.StructType.of(
          required(3, "count", Types.LongType.get()),
          required(4, "data", Types.StringType.get()))));
  Schema expected = new Schema(
      required(1, "id", Types.LongType.get()),
      required(2, "struct", Types.StructType.of(
          required(4, "data", Types.StringType.get()),
          required(3, "count", Types.LongType.get()))));

  Schema actual = new SchemaUpdate(schema, 4)
      .moveBefore("struct.data", "struct.count")
      .apply();

  Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct());
}
 
Example #17
Source File: TestPredicateBinding.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testNotInPredicateBindingConversionDedupToNotEq() {
  StructType struct = StructType.of(required(15, "d", Types.DecimalType.of(9, 2)));
  UnboundPredicate<Double> unbound = Expressions.notIn("d", 12.40, 12.401, 12.402);
  Assert.assertEquals("Should create a NOT_IN unbound predicate", NOT_IN, unbound.op());

  Expression expr = unbound.bind(struct);
  BoundPredicate<BigDecimal> bound = assertAndUnwrap(expr);
  Assert.assertTrue("Should be a literal predicate", bound.isLiteralPredicate());
  Assert.assertEquals("Should convert literal set values to a single decimal",
      new BigDecimal("12.40"), bound.asLiteralPredicate().literal().value());
  Assert.assertEquals("Should reference correct field ID", 15, bound.ref().fieldId());
  Assert.assertEquals("Should change the NOT_IN operation to NOT_EQ", NOT_EQ, bound.op());
}
 
Example #18
Source File: TestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static void assertEqualsSafe(Types.StructType struct, Record rec, Row row) {
  List<Types.NestedField> fields = struct.fields();
  for (int i = 0; i < fields.size(); i += 1) {
    Type fieldType = fields.get(i).type();

    Object expectedValue = rec.get(i);
    Object actualValue = row.get(i);

    assertEqualsSafe(fieldType, expectedValue, actualValue);
  }
}
 
Example #19
Source File: TestReadProjection.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapProjection() throws IOException {
  Schema writeSchema = new Schema(
      Types.NestedField.required(0, "id", Types.LongType.get()),
      Types.NestedField.optional(5, "properties",
          Types.MapType.ofOptional(6, 7, Types.StringType.get(), Types.StringType.get()))
  );

  Map<String, String> properties = ImmutableMap.of("a", "A", "b", "B");

  Record record = GenericRecord.create(writeSchema.asStruct());
  record.setField("id", 34L);
  record.setField("properties", properties);

  Schema idOnly = new Schema(
      Types.NestedField.required(0, "id", Types.LongType.get())
  );

  Record projected = writeAndRead("id_only", writeSchema, idOnly, record);
  Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id"));
  Assert.assertNull("Should not project properties map", projected.getField("properties"));

  Schema keyOnly = writeSchema.select("properties.key");
  projected = writeAndRead("key_only", writeSchema, keyOnly, record);
  Assert.assertNull("Should not project id", projected.getField("id"));
  Assert.assertEquals("Should project entire map",
      properties, toStringMap((Map) projected.getField("properties")));

  Schema valueOnly = writeSchema.select("properties.value");
  projected = writeAndRead("value_only", writeSchema, valueOnly, record);
  Assert.assertNull("Should not project id", projected.getField("id"));
  Assert.assertEquals("Should project entire map",
      properties, toStringMap((Map) projected.getField("properties")));

  Schema mapOnly = writeSchema.select("properties");
  projected = writeAndRead("map_only", writeSchema, mapOnly, record);
  Assert.assertNull("Should not project id", projected.getField("id"));
  Assert.assertEquals("Should project entire map",
      properties, toStringMap((Map) projected.getField("properties")));
}
 
Example #20
Source File: TestBuildOrcProjection.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testProjectionNested() {
  Types.StructType nestedStructType = Types.StructType.of(
      optional(2, "b", Types.StringType.get()),
      optional(3, "c", Types.DateType.get())
  );
  Schema originalSchema = new Schema(
      optional(1, "a", nestedStructType)
  );

  // Original mapping (stored in ORC)
  TypeDescription orcSchema = ORCSchemaUtil.convert(originalSchema);

  // Evolve schema
  Types.StructType newNestedStructType = Types.StructType.of(
      optional(3, "cc", Types.DateType.get()),
      optional(2, "bb", Types.StringType.get())
  );
  Schema evolveSchema = new Schema(
      optional(1, "aa", newNestedStructType)
  );

  TypeDescription newOrcSchema = ORCSchemaUtil.buildOrcProjection(evolveSchema, orcSchema);
  assertEquals(1, newOrcSchema.getChildren().size());
  assertEquals(TypeDescription.Category.STRUCT, newOrcSchema.findSubtype("a").getCategory());
  TypeDescription nestedCol = newOrcSchema.findSubtype("a");
  assertEquals(2, nestedCol.findSubtype("c").getId());
  assertEquals(TypeDescription.Category.DATE, nestedCol.findSubtype("c").getCategory());
  assertEquals(3, nestedCol.findSubtype("b").getId());
  assertEquals(TypeDescription.Category.STRING, nestedCol.findSubtype("b").getCategory());
}
 
Example #21
Source File: SchemaEvolutionTest.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void floatToDouble() throws IOException {
  // Set up a new table to test this conversion
  Schema schema = new Schema(optional(1, "float", Types.FloatType.get()));
  File location = Files.createTempDirectory("temp").toFile();
  HadoopTables tables = new HadoopTables(spark.sparkContext().hadoopConfiguration());
  Table floatTable = tables.create(schema, location.toString());

  floatTable.updateSchema().updateColumn("float", Types.DoubleType.get()).commit();

  log.info("Promote float type to double type:\n" + floatTable.schema().toString());
}
 
Example #22
Source File: FlinkTypeToType.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("checkstyle:CyclomaticComplexity")
@Override
public Type atomic(AtomicDataType type) {
  LogicalType inner = type.getLogicalType();
  if (inner instanceof VarCharType ||
      inner instanceof CharType) {
    return Types.StringType.get();
  } else if (inner instanceof BooleanType) {
    return Types.BooleanType.get();
  } else if (inner instanceof IntType ||
      inner instanceof SmallIntType ||
      inner instanceof TinyIntType) {
    return Types.IntegerType.get();
  } else if (inner instanceof BigIntType) {
    return Types.LongType.get();
  } else if (inner instanceof VarBinaryType) {
    return Types.BinaryType.get();
  } else if (inner instanceof BinaryType) {
    BinaryType binaryType = (BinaryType) inner;
    return Types.FixedType.ofLength(binaryType.getLength());
  } else if (inner instanceof FloatType) {
    return Types.FloatType.get();
  } else if (inner instanceof DoubleType) {
    return Types.DoubleType.get();
  } else if (inner instanceof DateType) {
    return Types.DateType.get();
  } else if (inner instanceof TimeType) {
    return Types.TimeType.get();
  } else if (inner instanceof TimestampType) {
    return Types.TimestampType.withoutZone();
  } else if (inner instanceof LocalZonedTimestampType) {
    return Types.TimestampType.withZone();
  } else if (inner instanceof DecimalType) {
    DecimalType decimalType = (DecimalType) inner;
    return Types.DecimalType.of(decimalType.getPrecision(), decimalType.getScale());
  } else {
    throw new UnsupportedOperationException("Not a supported type: " + type.toString());
  }
}
 
Example #23
Source File: TestLocalScan.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testFilterWithDateAndTimestamp() throws IOException {
  // TODO: Add multiple timestamp tests - there's an issue with ORC caching TZ in ThreadLocal, so it's not possible
  //   to change TZ and test with ORC as they will produce incompatible values.
  Schema schema = new Schema(
      required(1, "timestamp_with_zone", Types.TimestampType.withZone()),
      required(2, "timestamp_without_zone", Types.TimestampType.withoutZone()),
      required(3, "date", Types.DateType.get()),
      required(4, "time", Types.TimeType.get())
  );

  File tableLocation = temp.newFolder("complex_filter_table");
  Assert.assertTrue(tableLocation.delete());

  Table table = TABLES.create(
      schema, PartitionSpec.unpartitioned(),
      ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, format.name()),
      tableLocation.getAbsolutePath());

  List<Record> expected = RandomGenericData.generate(schema, 100, 435691832918L);
  DataFile file = writeFile(tableLocation.toString(), format.addExtension("record-file"), schema, expected);
  table.newFastAppend().appendFile(file).commit();

  for (Record r : expected) {
    Iterable<Record> filterResult = IcebergGenerics.read(table)
        .where(equal("timestamp_with_zone", r.getField("timestamp_with_zone").toString()))
        .where(equal("timestamp_without_zone", r.getField("timestamp_without_zone").toString()))
        .where(equal("date", r.getField("date").toString()))
        .where(equal("time", r.getField("time").toString()))
        .build();

    Assert.assertTrue(filterResult.iterator().hasNext());
    Record readRecord = filterResult.iterator().next();
    Assert.assertEquals(r.getField("timestamp_with_zone"), readRecord.getField("timestamp_with_zone"));
  }
}
 
Example #24
Source File: SnapshotFunctionalityTest.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Before
public void before() throws IOException {
  Schema schema = new Schema(
      optional(1, "id", Types.IntegerType.get()),
      optional(2, "data", Types.StringType.get())
  );

  spark = SparkSession.builder().master("local[2]").getOrCreate();

  tableLocation = Files.createTempDirectory("temp").toFile();

  HadoopTables tables = new HadoopTables(spark.sparkContext().hadoopConfiguration());
  PartitionSpec spec = PartitionSpec.unpartitioned();
  table = tables.create(schema, spec, tableLocation.toString());

  List<SimpleRecord> expected = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b"),
      new SimpleRecord(3, "c")
  );

  Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.class);

  for (int i = 0; i < 5; i++) {
    df.select("id", "data").write()
        .format("iceberg")
        .mode("append")
        .save(tableLocation.toString());
  }
  table.refresh();
}
 
Example #25
Source File: TestSchemaUpdate.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testMoveTopLevelColumnAfterLast() {
  Schema schema = new Schema(
      required(1, "id", Types.LongType.get()),
      required(2, "data", Types.StringType.get()));
  Schema expected = new Schema(
      required(2, "data", Types.StringType.get()),
      required(1, "id", Types.LongType.get()));

  Schema actual = new SchemaUpdate(schema, 2)
      .moveAfter("id", "data")
      .apply();

  Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct());
}
 
Example #26
Source File: TestDataTableScan.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testTableScanHonorsSelect() {
  TableScan scan = table.newScan().select("id");

  Schema expectedSchema = new Schema(required(1, "id", Types.IntegerType.get()));

  assertEquals("A tableScan.select() should prune the schema",
      expectedSchema.asStruct(),
      scan.schema().asStruct());
}
 
Example #27
Source File: RewriteManifestsAction.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static ManifestFile writeManifest(
    List<Row> rows, int startIndex, int endIndex, Broadcast<FileIO> io,
    String location, int format, PartitionSpec spec, StructType sparkType) throws IOException {

  String manifestName = "optimized-m-" + UUID.randomUUID();
  Path manifestPath = new Path(location, manifestName);
  OutputFile outputFile = io.value().newOutputFile(FileFormat.AVRO.addExtension(manifestPath.toString()));

  Types.StructType dataFileType = DataFile.getType(spec.partitionType());
  SparkDataFile wrapper = new SparkDataFile(dataFileType, sparkType);

  ManifestWriter writer = ManifestFiles.write(format, spec, outputFile, null);

  try {
    for (int index = startIndex; index < endIndex; index++) {
      Row row = rows.get(index);
      long snapshotId = row.getLong(0);
      long sequenceNumber = row.getLong(1);
      Row file = row.getStruct(2);
      writer.existing(wrapper.wrap(file), snapshotId, sequenceNumber);
    }
  } finally {
    writer.close();
  }

  return writer.toManifestFile();
}
 
Example #28
Source File: DataTestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static void assertEquals(Types.ListType list, List<?> expected, List<?> actual) {
  Type elementType = list.elementType();

  Assert.assertEquals("List size should match", expected.size(), actual.size());

  for (int i = 0; i < expected.size(); i += 1) {
    Object expectedValue = expected.get(i);
    Object actualValue = actual.get(i);

    assertEquals(elementType, expectedValue, actualValue);
  }
}
 
Example #29
Source File: StrictMetricsEvaluator.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
  Integer id = ref.fieldId();
  Types.NestedField field = struct.field(id);
  Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));

  if (canContainNulls(id)) {
    return ROWS_MIGHT_NOT_MATCH;
  }

  if (lowerBounds != null && lowerBounds.containsKey(id) &&
      upperBounds != null && upperBounds.containsKey(id)) {
    // similar to the implementation in eq, first check if the lower bound is in the set
    T lower = Conversions.fromByteBuffer(struct.field(id).type(), lowerBounds.get(id));
    if (!literalSet.contains(lower)) {
      return ROWS_MIGHT_NOT_MATCH;
    }

    // check if the upper bound is in the set
    T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id));
    if (!literalSet.contains(upper)) {
      return ROWS_MIGHT_NOT_MATCH;
    }

    // finally check if the lower bound and the upper bound are equal
    if (ref.comparator().compare(lower, upper) != 0) {
      return ROWS_MIGHT_NOT_MATCH;
    }

    // All values must be in the set if the lower bound and the upper bound are in the set and are equal.
    return ROWS_MUST_MATCH;
  }

  return ROWS_MIGHT_NOT_MATCH;
}
 
Example #30
Source File: ParquetAvroValueReaders.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public ParquetValueReader<?> list(Types.ListType expectedList, GroupType array,
                                  ParquetValueReader<?> elementReader) {
  GroupType repeated = array.getFields().get(0).asGroupType();
  String[] repeatedPath = currentPath();

  int repeatedD = type.getMaxDefinitionLevel(repeatedPath) - 1;
  int repeatedR = type.getMaxRepetitionLevel(repeatedPath) - 1;

  Type elementType = repeated.getType(0);
  int elementD = type.getMaxDefinitionLevel(path(elementType.getName())) - 1;

  return new ListReader<>(repeatedD, repeatedR, ParquetValueReaders.option(elementType, elementD, elementReader));
}