org.apache.iceberg.types.Type Java Examples

The following examples show how to use org.apache.iceberg.types.Type. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SchemaParser.java    From iceberg with Apache License 2.0 6 votes vote down vote up
static void toJson(Type type, JsonGenerator generator) throws IOException {
  if (type.isPrimitiveType()) {
    toJson(type.asPrimitiveType(), generator);
  } else {
    Type.NestedType nested = type.asNestedType();
    switch (type.typeId()) {
      case STRUCT:
        toJson(nested.asStructType(), generator);
        break;
      case LIST:
        toJson(nested.asListType(), generator);
        break;
      case MAP:
        toJson(nested.asMapType(), generator);
        break;
      default:
        throw new IllegalArgumentException("Cannot write unknown type: " + type);
    }
  }
}
 
Example #2
Source File: MessageTypeToType.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public Type list(GroupType array, Type elementType) {
  GroupType repeated = array.getType(0).asGroupType();
  org.apache.parquet.schema.Type element = repeated.getType(0);

  Preconditions.checkArgument(
      !element.isRepetition(Repetition.REPEATED),
      "Elements cannot have repetition REPEATED: %s", element);

  int elementFieldId = getId(element);

  addAlias(element.getName(), elementFieldId);

  if (element.isRepetition(Repetition.OPTIONAL)) {
    return Types.ListType.ofOptional(elementFieldId, elementType);
  } else {
    return Types.ListType.ofRequired(elementFieldId, elementType);
  }
}
 
Example #3
Source File: ExpressionToSearchArgument.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private PredicateLeaf.Type type(Type icebergType) {
  switch (icebergType.typeId()) {
    case BOOLEAN:
      return PredicateLeaf.Type.BOOLEAN;
    case INTEGER:
    case LONG:
    case TIME:
      return PredicateLeaf.Type.LONG;
    case FLOAT:
    case DOUBLE:
      return PredicateLeaf.Type.FLOAT;
    case DATE:
      return PredicateLeaf.Type.DATE;
    case TIMESTAMP:
      return PredicateLeaf.Type.TIMESTAMP;
    case STRING:
      return PredicateLeaf.Type.STRING;
    case DECIMAL:
      return PredicateLeaf.Type.DECIMAL;
    default:
      throw new UnsupportedOperationException("Type " + icebergType + " not supported in ORC SearchArguments");
  }
}
 
Example #4
Source File: Literals.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T> Literal<T> to(Type type) {
  switch (type.typeId()) {
    case FLOAT:
      return (Literal<T>) this;
    case DOUBLE:
      return (Literal<T>) new DoubleLiteral(value().doubleValue());
    case DECIMAL:
      int scale = ((Types.DecimalType) type).scale();
      return (Literal<T>) new DecimalLiteral(
          BigDecimal.valueOf(value()).setScale(scale, RoundingMode.HALF_UP));
    default:
      return null;
  }
}
 
Example #5
Source File: InternalRecordWrapper.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static Function<Object, Object> converter(Type type) {
  switch (type.typeId()) {
    case DATE:
      return date -> DateTimeUtil.daysFromDate((LocalDate) date);
    case TIME:
      return time -> DateTimeUtil.microsFromTime((LocalTime) time);
    case TIMESTAMP:
      if (((Types.TimestampType) type).shouldAdjustToUTC()) {
        return timestamp -> DateTimeUtil.microsFromTimestamptz((OffsetDateTime) timestamp);
      } else {
        return timestamp -> DateTimeUtil.microsFromTimestamp((LocalDateTime) timestamp);
      }
    case FIXED:
      return bytes -> ByteBuffer.wrap((byte[]) bytes);
    case STRUCT:
      InternalRecordWrapper wrapper = new InternalRecordWrapper(type.asStructType());
      return struct -> wrapper.wrap((StructLike) struct);
    default:
  }
  return null;
}
 
Example #6
Source File: Literals.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T> Literal<T> to(Type type) {
  switch (type.typeId()) {
    case FIXED:
      Types.FixedType fixed = (Types.FixedType) type;
      if (value().remaining() == fixed.length()) {
        return (Literal<T>) this;
      }
      return null;
    case BINARY:
      return (Literal<T>) new BinaryLiteral(value());
    default:
      return null;
  }
}
 
Example #7
Source File: TestDates.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testDatesReturnType() {
  Types.DateType type = Types.DateType.get();

  Transform<Integer, Integer> year = Transforms.year(type);
  Type yearResultType = year.getResultType(type);
  Assert.assertEquals(Types.IntegerType.get(), yearResultType);

  Transform<Integer, Integer> month = Transforms.month(type);
  Type monthResultType = month.getResultType(type);
  Assert.assertEquals(Types.IntegerType.get(), monthResultType);

  Transform<Integer, Integer> day = Transforms.day(type);
  Type dayResultType = day.getResultType(type);
  Assert.assertEquals(Types.DateType.get(), dayResultType);
}
 
Example #8
Source File: ManifestsTable.java    From presto with Apache License 2.0 6 votes vote down vote up
private static void writePartitionSummaries(BlockBuilder blockBuilder, List<PartitionFieldSummary> summaries, PartitionSpec partitionSpec)
{
    for (int i = 0; i < summaries.size(); i++) {
        PartitionFieldSummary summary = summaries.get(i);
        PartitionField field = partitionSpec.fields().get(i);
        Type nestedType = partitionSpec.partitionType().fields().get(i).type();

        BlockBuilder rowBuilder = blockBuilder.beginBlockEntry();
        BOOLEAN.writeBoolean(rowBuilder, summary.containsNull());
        VARCHAR.writeString(rowBuilder, field.transform().toHumanString(
                Conversions.fromByteBuffer(nestedType, summary.lowerBound())));
        VARCHAR.writeString(rowBuilder, field.transform().toHumanString(
                Conversions.fromByteBuffer(nestedType, summary.upperBound())));
        blockBuilder.closeEntry();
    }
}
 
Example #9
Source File: ParquetUtil.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T> void updateMin(Map<Integer, Literal<?>> lowerBounds, int id, Type type,
                                  Literal<T> min, MetricsMode metricsMode) {
  Literal<T> currentMin = (Literal<T>) lowerBounds.get(id);
  if (currentMin == null || min.comparator().compare(min.value(), currentMin.value()) < 0) {
    if (metricsMode == MetricsModes.Full.get()) {
      lowerBounds.put(id, min);
    } else {
      MetricsModes.Truncate truncateMode = (MetricsModes.Truncate) metricsMode;
      int truncateLength = truncateMode.length();
      switch (type.typeId()) {
        case STRING:
          lowerBounds.put(id, UnicodeUtil.truncateStringMin((Literal<CharSequence>) min, truncateLength));
          break;
        case FIXED:
        case BINARY:
          lowerBounds.put(id, BinaryUtil.truncateBinaryMin((Literal<ByteBuffer>) min, truncateLength));
          break;
        default:
          lowerBounds.put(id, min);
      }
    }
  }
}
 
Example #10
Source File: SchemaParser.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static Type typeFromJson(JsonNode json) {
  if (json.isTextual()) {
    return Types.fromPrimitiveString(json.asText());

  } else if (json.isObject()) {
    String type = json.get(TYPE).asText();
    if (STRUCT.equals(type)) {
      return structFromJson(json);
    } else if (LIST.equals(type)) {
      return listFromJson(json);
    } else if (MAP.equals(type)) {
      return mapFromJson(json);
    }
  }

  throw new IllegalArgumentException("Cannot parse type from json: " + json);
}
 
Example #11
Source File: AvroSchemaWithTypeVisitor.java    From iceberg with Apache License 2.0 6 votes vote down vote up
public static <T> T visit(Type iType, Schema schema, AvroSchemaWithTypeVisitor<T> visitor) {
  switch (schema.getType()) {
    case RECORD:
      return visitRecord(iType != null ? iType.asStructType() : null, schema, visitor);

    case UNION:
      return visitUnion(iType, schema, visitor);

    case ARRAY:
      return visitArray(iType, schema, visitor);

    case MAP:
      Types.MapType map = iType != null ? iType.asMapType() : null;
      return visitor.map(map, schema,
          visit(map != null ? map.valueType() : null, schema.getValueType(), visitor));

    default:
      return visitor.primitive(iType != null ? iType.asPrimitiveType() : null, schema);
  }
}
 
Example #12
Source File: GenericsHelpers.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static void assertEqualsUnsafe(Types.MapType map, Map<?, ?> expected, MapData actual) {
  Type keyType = map.keyType();
  Type valueType = map.valueType();

  List<Map.Entry<?, ?>> expectedElements = Lists.newArrayList(expected.entrySet());
  ArrayData actualKeys = actual.keyArray();
  ArrayData actualValues = actual.valueArray();

  for (int i = 0; i < expectedElements.size(); i += 1) {
    Map.Entry<?, ?> expectedPair = expectedElements.get(i);
    Object actualKey = actualKeys.get(i, convert(keyType));
    Object actualValue = actualValues.get(i, convert(keyType));

    assertEqualsUnsafe(keyType, expectedPair.getKey(), actualKey);
    assertEqualsUnsafe(valueType, expectedPair.getValue(), actualValue);
  }
}
 
Example #13
Source File: TestMiscLiteralConversions.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testIdentityConversions() {
  List<Pair<Literal<?>, Type>> pairs = Arrays.asList(
      Pair.of(Literal.of(true), Types.BooleanType.get()),
      Pair.of(Literal.of(34), Types.IntegerType.get()),
      Pair.of(Literal.of(34L), Types.LongType.get()),
      Pair.of(Literal.of(34.11F), Types.FloatType.get()),
      Pair.of(Literal.of(34.55D), Types.DoubleType.get()),
      Pair.of(Literal.of("34.55"), Types.DecimalType.of(9, 2)),
      Pair.of(Literal.of("2017-08-18"), Types.DateType.get()),
      Pair.of(Literal.of("14:21:01.919"), Types.TimeType.get()),
      Pair.of(Literal.of("2017-08-18T14:21:01.919"), Types.TimestampType.withoutZone()),
      Pair.of(Literal.of("abc"), Types.StringType.get()),
      Pair.of(Literal.of(UUID.randomUUID()), Types.UUIDType.get()),
      Pair.of(Literal.of(new byte[] {0, 1, 2}), Types.FixedType.ofLength(3)),
      Pair.of(Literal.of(ByteBuffer.wrap(new byte[] {0, 1, 2})), Types.BinaryType.get())
  );

  for (Pair<Literal<?>, Type> pair : pairs) {
    Literal<?> lit = pair.first();
    Type type = pair.second();

    // first, convert the literal to the target type (date/times start as strings)
    Literal<?> expected = lit.to(type);

    // then check that converting again to the same type results in an identical literal
    Assert.assertSame("Converting twice should produce identical values",
        expected, expected.to(type));
  }
}
 
Example #14
Source File: TestSchemaConversions.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapOfLongToBytes() {
  Type map = Types.MapType.ofRequired(33, 34, Types.LongType.get(), Types.BinaryType.get());
  Schema schema = AvroSchemaUtil.createMap(
      33, Schema.create(Schema.Type.LONG),
      34, Schema.create(Schema.Type.BYTES));

  Assert.assertEquals("Avro schema to map",
      map, AvroSchemaUtil.convert(schema));
  Assert.assertEquals("Map to Avro schema",
      schema, AvroSchemaUtil.convert(map));
}
 
Example #15
Source File: BaseFile.java    From iceberg with Apache License 2.0 5 votes vote down vote up
/**
 * Used by Avro reflection to instantiate this class when reading manifest files.
 */
BaseFile(Schema avroSchema) {
  this.avroSchema = avroSchema;

  Types.StructType schema = AvroSchemaUtil.convert(avroSchema).asNestedType().asStructType();

  // partition type may be null if the field was not projected
  Type partType = schema.fieldType("partition");
  if (partType != null) {
    this.partitionType = partType.asNestedType().asStructType();
  } else {
    this.partitionType = EMPTY_STRUCT_TYPE;
  }

  List<Types.NestedField> fields = schema.fields();
  List<Types.NestedField> allFields = DataFile.getType(partitionType).fields();
  this.fromProjectionPos = new int[fields.size()];
  for (int i = 0; i < fromProjectionPos.length; i += 1) {
    boolean found = false;
    for (int j = 0; j < allFields.size(); j += 1) {
      if (fields.get(i).fieldId() == allFields.get(j).fieldId()) {
        found = true;
        fromProjectionPos[i] = j;
      }
    }

    if (!found) {
      throw new IllegalArgumentException("Cannot find projected field: " + fields.get(i));
    }
  }

  this.partitionData = new PartitionData(partitionType);
}
 
Example #16
Source File: IcebergInputFormat.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private Map<Integer, ?> constantsMap(FileScanTask task, BiFunction<Type, Object, Object> converter) {
  PartitionSpec spec = task.spec();
  Set<Integer> idColumns = spec.identitySourceIds();
  Schema partitionSchema = TypeUtil.select(expectedSchema, idColumns);
  boolean projectsIdentityPartitionColumns = !partitionSchema.columns().isEmpty();
  if (projectsIdentityPartitionColumns) {
    return PartitionUtil.constantsMap(task, converter);
  } else {
    return Collections.emptyMap();
  }
}
 
Example #17
Source File: Literals.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T> Literal<T> to(Type type) {
  if (type.typeId() == Type.TypeID.TIME) {
    return (Literal<T>) this;
  }
  return null;
}
 
Example #18
Source File: Schema.java    From iceberg with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the {@link Type} of a sub-field identified by the field id.
 *
 * @param id a field id
 * @return a Type for the sub-field or null if it is not found
 */
public Type findType(int id) {
  NestedField field = lazyIdToField().get(id);
  if (field != null) {
    return field.type();
  }
  return null;
}
 
Example #19
Source File: Literals.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T> Literal<T> to(Type type) {
  switch (type.typeId()) {
    case INTEGER:
      if ((long) Integer.MAX_VALUE < value()) {
        return aboveMax();
      } else if ((long) Integer.MIN_VALUE > value()) {
        return belowMin();
      }
      return (Literal<T>) new IntegerLiteral(value().intValue());
    case LONG:
      return (Literal<T>) this;
    case FLOAT:
      return (Literal<T>) new FloatLiteral(value().floatValue());
    case DOUBLE:
      return (Literal<T>) new DoubleLiteral(value().doubleValue());
    case TIME:
      return (Literal<T>) new TimeLiteral(value());
    case TIMESTAMP:
      return (Literal<T>) new TimestampLiteral(value());
    case DATE:
      if ((long) Integer.MAX_VALUE < value()) {
        return aboveMax();
      } else if ((long) Integer.MIN_VALUE > value()) {
        return belowMin();
      }
      return (Literal<T>) new DateLiteral(value().intValue());
    case DECIMAL:
      int scale = ((Types.DecimalType) type).scale();
      // rounding mode isn't necessary, but pass one to avoid warnings
      return (Literal<T>) new DecimalLiteral(
          BigDecimal.valueOf(value()).setScale(scale, RoundingMode.HALF_UP));
    default:
      return null;
  }
}
 
Example #20
Source File: RandomData.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
protected Object randomValue(Type.PrimitiveType primitive, Random rand) {
  this.rowCount += 1;
  if (rowCount > dictionaryEncodedRows) {
    return RandomUtil.generatePrimitive(primitive, rand);
  } else {
    return RandomUtil.generateDictionaryEncodablePrimitive(primitive, rand);
  }
}
 
Example #21
Source File: PartitionSpec.java    From iceberg with Apache License 2.0 5 votes vote down vote up
/**
 * @return a {@link StructType} for partition data defined by this spec.
 */
public StructType partitionType() {
  List<Types.NestedField> structFields = Lists.newArrayListWithExpectedSize(fields.length);

  for (int i = 0; i < fields.length; i += 1) {
    PartitionField field = fields[i];
    Type sourceType = schema.findType(field.sourceId());
    Type resultType = field.transform().getResultType(sourceType);
    structFields.add(
        Types.NestedField.optional(field.fieldId(), field.name(), resultType));
  }

  return Types.StructType.of(structFields);
}
 
Example #22
Source File: ParquetUtil.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static boolean shouldStoreBounds(ColumnPath columnPath, Schema schema) {
  Iterator<String> pathIterator = columnPath.iterator();
  Type currentType = schema.asStruct();

  while (pathIterator.hasNext()) {
    if (currentType == null || !currentType.isStructType()) {
      return false;
    }
    String fieldName = pathIterator.next();
    currentType = currentType.asStructType().fieldType(fieldName);
  }

  return currentType != null && currentType.isPrimitiveType();
}
 
Example #23
Source File: MessageTypeToType.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public Optional<Type> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intType) {
  Preconditions.checkArgument(intType.isSigned() || intType.getBitWidth() < 64,
      "Cannot use uint64: not a supported Java type");
  if (intType.getBitWidth() < 32) {
    return Optional.of(Types.IntegerType.get());
  } else if (intType.getBitWidth() == 32 && intType.isSigned()) {
    return Optional.of(Types.IntegerType.get());
  } else {
    return Optional.of(Types.LongType.get());
  }
}
 
Example #24
Source File: StructInternalRow.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private ArrayData collectionToArrayData(Type elementType, Collection<?> values) {
  switch (elementType.typeId()) {
    case BOOLEAN:
    case INTEGER:
    case DATE:
    case TIME:
    case LONG:
    case TIMESTAMP:
    case FLOAT:
    case DOUBLE:
      return fillArray(values, array -> (pos, value) -> array[pos] = value);
    case STRING:
      return fillArray(values, array ->
          (BiConsumer<Integer, CharSequence>) (pos, seq) -> array[pos] = UTF8String.fromString(seq.toString()));
    case FIXED:
    case BINARY:
      return fillArray(values, array ->
          (BiConsumer<Integer, ByteBuffer>) (pos, buf) -> array[pos] = ByteBuffers.toByteArray(buf));
    case DECIMAL:
      return fillArray(values, array ->
          (BiConsumer<Integer, BigDecimal>) (pos, dec) -> array[pos] = Decimal.apply(dec));
    case STRUCT:
      return fillArray(values, array -> (BiConsumer<Integer, StructLike>) (pos, tuple) ->
          array[pos] = new StructInternalRow(elementType.asStructType(), tuple));
    case LIST:
      return fillArray(values, array -> (BiConsumer<Integer, Collection<?>>) (pos, list) ->
          array[pos] = collectionToArrayData(elementType.asListType(), list));
    case MAP:
      return fillArray(values, array -> (BiConsumer<Integer, Map<?, ?>>) (pos, map) ->
          array[pos] = mapToMapData(elementType.asMapType(), map));
    default:
      throw new UnsupportedOperationException("Unsupported array element type: " + elementType);
  }
}
 
Example #25
Source File: ManifestFileUtil.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
FieldSummary(Type.PrimitiveType primitive, ManifestFile.PartitionFieldSummary summary) {
  this.comparator = Comparators.forType(primitive);
  this.javaClass = (Class<T>) primitive.typeId().javaClass();
  this.lowerBound = Conversions.fromByteBuffer(primitive, summary.lowerBound());
  this.upperBound = Conversions.fromByteBuffer(primitive, summary.upperBound());
  this.containsNull = summary.containsNull();
}
 
Example #26
Source File: TestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static void assertEqualsUnsafe(Types.StructType struct, Record rec, InternalRow row) {
  List<Types.NestedField> fields = struct.fields();
  for (int i = 0; i < fields.size(); i += 1) {
    Type fieldType = fields.get(i).type();

    Object expectedValue = rec.get(i);
    Object actualValue = row.isNullAt(i) ? null : row.get(i, convert(fieldType));

    assertEqualsUnsafe(fieldType, expectedValue, actualValue);
  }
}
 
Example #27
Source File: ParquetMetricsRowGroupFilter.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public <T> Boolean notNull(BoundReference<T> ref) {
  // no need to check whether the field is required because binding evaluates that case
  // if the column has no non-null values, the expression cannot match
  Integer id = ref.fieldId();

  // When filtering nested types notNull() is implicit filter passed even though complex
  // filters aren't pushed down in Parquet. Leave all nested column type filters to be
  // evaluated post scan.
  if (schema.findType(id) instanceof Type.NestedType) {
    return ROWS_MIGHT_MATCH;
  }

  Long valueCount = valueCounts.get(id);
  if (valueCount == null) {
    // the column is not present and is all nulls
    return ROWS_CANNOT_MATCH;
  }

  Statistics<?> colStats = stats.get(id);
  if (colStats != null && valueCount - colStats.getNumNulls() == 0) {
    // (num nulls == value count) => all values are null => no non-null values
    return ROWS_CANNOT_MATCH;
  }

  return ROWS_MIGHT_MATCH;
}
 
Example #28
Source File: SchemaParser.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Types.MapType mapFromJson(JsonNode json) {
  int keyId = JsonUtil.getInt(KEY_ID, json);
  Type keyType = typeFromJson(json.get(KEY));

  int valueId = JsonUtil.getInt(VALUE_ID, json);
  Type valueType = typeFromJson(json.get(VALUE));

  boolean isRequired = JsonUtil.getBool(VALUE_REQUIRED, json);

  if (isRequired) {
    return Types.MapType.ofRequired(keyId, valueId, keyType, valueType);
  } else {
    return Types.MapType.ofOptional(keyId, valueId, keyType, valueType);
  }
}
 
Example #29
Source File: DataTestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static void assertEquals(Types.MapType map, Map<?, ?> expected, Map<?, ?> actual) {
  Type valueType = map.valueType();

  Assert.assertEquals("Map size should match", expected.size(), actual.size());

  for (Object expectedKey : expected.keySet()) {
    Object expectedValue = expected.get(expectedKey);
    Object actualValue = actual.get(expectedKey);

    assertEquals(valueType, expectedValue, actualValue);
  }
}
 
Example #30
Source File: IdentityPartitionConverters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
/**
 * Conversions from internal representations to Iceberg generic values.
 */
public static Object convertConstant(Type type, Object value) {
  if (value == null) {
    return null;
  }

  switch (type.typeId()) {
    case STRING:
      return value.toString();
    case TIME:
      return DateTimeUtil.timeFromMicros((Long) value);
    case DATE:
      return DateTimeUtil.dateFromDays((Integer) value);
    case TIMESTAMP:
      if (((Types.TimestampType) type).shouldAdjustToUTC()) {
        return DateTimeUtil.timestamptzFromMicros((Long) value);
      } else {
        return DateTimeUtil.timestampFromMicros((Long) value);
      }
    case FIXED:
      if (value instanceof GenericData.Fixed) {
        return ((GenericData.Fixed) value).bytes();
      }
      return value;
    default:
  }
  return value;
}