Java Code Examples for org.apache.iceberg.types.Types#ListType

The following examples show how to use org.apache.iceberg.types.Types#ListType . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TypeConverter.java    From presto with Apache License 2.0 6 votes vote down vote up
private static List<OrcType> toOrcListType(int nextFieldTypeIndex, Types.ListType listType, Map<String, String> attributes)
{
    nextFieldTypeIndex++;
    Map<String, String> elementAttributes = ImmutableMap.<String, String>builder()
            .put(ORC_ICEBERG_ID_KEY, Integer.toString(listType.elementId()))
            .put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(listType.isElementRequired()))
            .build();
    List<OrcType> itemTypes = toOrcType(nextFieldTypeIndex, listType.elementType(), elementAttributes);

    List<OrcType> orcTypes = new ArrayList<>();
    orcTypes.add(new OrcType(
            OrcType.OrcTypeKind.LIST,
            ImmutableList.of(new OrcColumnId(nextFieldTypeIndex)),
            ImmutableList.of("item"),
            Optional.empty(),
            Optional.empty(),
            Optional.empty(),
            attributes));

    orcTypes.addAll(itemTypes);
    return orcTypes;
}
 
Example 2
Source File: OrcSchemaWithTypeVisitor.java    From iceberg with Apache License 2.0 6 votes vote down vote up
public static <T> T visit(Type iType, TypeDescription schema, OrcSchemaWithTypeVisitor<T> visitor) {
  switch (schema.getCategory()) {
    case STRUCT:
      return visitRecord(iType != null ? iType.asStructType() : null, schema, visitor);

    case UNION:
      throw new UnsupportedOperationException("Cannot handle " + schema);

    case LIST:
      Types.ListType list = iType != null ? iType.asListType() : null;
      return visitor.list(
          list, schema,
          visit(list.elementType(), schema.getChildren().get(0), visitor));

    case MAP:
      Types.MapType map = iType != null ? iType.asMapType() : null;
      return visitor.map(
          map, schema,
          visit(map != null ? map.keyType() : null, schema.getChildren().get(0), visitor),
          visit(map != null ? map.valueType() : null, schema.getChildren().get(1), visitor));

    default:
      return visitor.primitive(iType != null ? iType.asPrimitiveType() : null, schema);
  }
}
 
Example 3
Source File: AvroSchemaWithTypeVisitor.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static <T> T visitArray(Type type, Schema array, AvroSchemaWithTypeVisitor<T> visitor) {
  if (array.getLogicalType() instanceof LogicalMap || (type != null && type.isMapType())) {
    Preconditions.checkState(
        AvroSchemaUtil.isKeyValueSchema(array.getElementType()),
        "Cannot visit invalid logical map type: %s", array);
    Types.MapType map = type != null ? type.asMapType() : null;
    List<Schema.Field> keyValueFields = array.getElementType().getFields();
    return visitor.map(map, array,
        visit(map != null ? map.keyType() : null, keyValueFields.get(0).schema(), visitor),
        visit(map != null ? map.valueType() : null, keyValueFields.get(1).schema(), visitor));

  } else {
    Types.ListType list = type != null ? type.asListType() : null;
    return visitor.array(list, array,
        visit(list != null ? list.elementType() : null, array.getElementType(), visitor));
  }
}
 
Example 4
Source File: ParquetAvroValueReaders.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public ParquetValueReader<?> list(Types.ListType expectedList, GroupType array,
                                  ParquetValueReader<?> elementReader) {
  GroupType repeated = array.getFields().get(0).asGroupType();
  String[] repeatedPath = currentPath();

  int repeatedD = type.getMaxDefinitionLevel(repeatedPath) - 1;
  int repeatedR = type.getMaxRepetitionLevel(repeatedPath) - 1;

  Type elementType = repeated.getType(0);
  int elementD = type.getMaxDefinitionLevel(path(elementType.getName())) - 1;

  return new ListReader<>(repeatedD, repeatedR, ParquetValueReaders.option(elementType, elementD, elementReader));
}
 
Example 5
Source File: TestSchemaUpdate.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Test
public void testAddNestedListOfStructs() {
  Schema schema = new Schema(required(1, "id", Types.IntegerType.get()));
  Types.ListType list = Types.ListType.ofOptional(1,
      Types.StructType.of(
          required(9, "lat", Types.IntegerType.get()),
          optional(8, "long", Types.IntegerType.get())
      )
  );

  Schema expected = new Schema(
      required(1, "id", Types.IntegerType.get()),
      optional(2, "locations", Types.ListType.ofOptional(3,
          Types.StructType.of(
              required(4, "lat", Types.IntegerType.get()),
              optional(5, "long", Types.IntegerType.get())
          )
      ))
  );

  Schema result = new SchemaUpdate(schema, 1)
      .addColumn("locations", list)
      .apply();

  Assert.assertEquals("Should add map and reassign column IDs",
      expected.asStruct(), result.asStruct());
}
 
Example 6
Source File: AvroTestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static void assertEquals(Types.ListType list, List<?> expected, List<?> actual) {
  Type elementType = list.elementType();

  Assert.assertEquals("List size should match", expected.size(), actual.size());

  for (int i = 0; i < expected.size(); i += 1) {
    Object expectedValue = expected.get(i);
    Object actualValue = actual.get(i);

    assertEquals(elementType, expectedValue, actualValue);
  }
}
 
Example 7
Source File: TestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static void assertEqualsSafe(Types.ListType list, Collection<?> expected, List actual) {
  Type elementType = list.elementType();
  List<?> expectedElements = Lists.newArrayList(expected);
  for (int i = 0; i < expectedElements.size(); i += 1) {
    Object expectedValue = expectedElements.get(i);
    Object actualValue = actual.get(i);

    assertEqualsSafe(elementType, expectedValue, actualValue);
  }
}
 
Example 8
Source File: GenericsHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static void assertEqualsUnsafe(Types.ListType list, Collection<?> expected, ArrayData actual) {
  Type elementType = list.elementType();
  List<?> expectedElements = Lists.newArrayList(expected);
  for (int i = 0; i < expectedElements.size(); i += 1) {
    Object expectedValue = expectedElements.get(i);
    Object actualValue = actual.get(i, convert(elementType));

    assertEqualsUnsafe(elementType, expectedValue, actualValue);
  }
}
 
Example 9
Source File: GenericsHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static void assertEqualsSafe(Types.ListType list, Collection<?> expected, List<?> actual) {
  Type elementType = list.elementType();
  List<?> expectedElements = Lists.newArrayList(expected);
  for (int i = 0; i < expectedElements.size(); i += 1) {
    Object expectedValue = expectedElements.get(i);
    Object actualValue = actual.get(i);

    assertEqualsSafe(elementType, expectedValue, actualValue);
  }
}
 
Example 10
Source File: SchemaParser.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Types.ListType listFromJson(JsonNode json) {
  int elementId = JsonUtil.getInt(ELEMENT_ID, json);
  Type elementType = typeFromJson(json.get(ELEMENT));
  boolean isRequired = JsonUtil.getBool(ELEMENT_REQUIRED, json);

  if (isRequired) {
    return Types.ListType.ofRequired(elementId, elementType);
  } else {
    return Types.ListType.ofOptional(elementId, elementType);
  }
}
 
Example 11
Source File: DataTestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static void assertEquals(Types.ListType list, List<?> expected, List<?> actual) {
  Type elementType = list.elementType();

  Assert.assertEquals("List size should match", expected.size(), actual.size());

  for (int i = 0; i < expected.size(); i += 1) {
    Object expectedValue = expected.get(i);
    Object actualValue = actual.get(i);

    assertEquals(elementType, expectedValue, actualValue);
  }
}
 
Example 12
Source File: GenericParquetReaders.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public ParquetValueReader<?> list(Types.ListType expectedList, GroupType array,
                                  ParquetValueReader<?> elementReader) {
  GroupType repeated = array.getFields().get(0).asGroupType();
  String[] repeatedPath = currentPath();

  int repeatedD = type.getMaxDefinitionLevel(repeatedPath) - 1;
  int repeatedR = type.getMaxRepetitionLevel(repeatedPath) - 1;

  Type elementType = repeated.getType(0);
  int elementD = type.getMaxDefinitionLevel(path(elementType.getName())) - 1;

  return new ListReader<>(repeatedD, repeatedR, ParquetValueReaders.option(elementType, elementD, elementReader));
}
 
Example 13
Source File: SchemaParser.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static void toJson(Types.ListType list, JsonGenerator generator) throws IOException {
  generator.writeStartObject();

  generator.writeStringField(TYPE, LIST);

  generator.writeNumberField(ELEMENT_ID, list.elementId());
  generator.writeFieldName(ELEMENT);
  toJson(list.elementType(), generator);
  generator.writeBooleanField(ELEMENT_REQUIRED, !list.isElementOptional());

  generator.writeEndObject();
}
 
Example 14
Source File: SparkOrcReader.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public OrcValueReader<?> list(Types.ListType iList, TypeDescription array, OrcValueReader<?> elementReader) {
  return SparkOrcValueReaders.array(elementReader);
}
 
Example 15
Source File: HiveTypeConverter.java    From metacat with Apache License 2.0 4 votes vote down vote up
/**
 * convert iceberg to hive type.
 * @param type iceberg type.
 * @return hive type string.
 */
public static String fromIcebergToHiveType(final org.apache.iceberg.types.Type type) {
    switch (type.typeId()) {
        case BOOLEAN:
            return serdeConstants.BOOLEAN_TYPE_NAME;
        case INTEGER:
            return serdeConstants.INT_TYPE_NAME;
        case LONG:
            return serdeConstants.BIGINT_TYPE_NAME;
        case FLOAT:
            return serdeConstants.FLOAT_TYPE_NAME;
        case DOUBLE:
            return serdeConstants.DOUBLE_TYPE_NAME;
        case DATE:
            return serdeConstants.DATE_TYPE_NAME;
        case TIME:
            throw new UnsupportedOperationException("Hive does not support time fields");
        case TIMESTAMP:
            return serdeConstants.TIMESTAMP_TYPE_NAME;
        case STRING:
        case UUID:
            return serdeConstants.STRING_TYPE_NAME;
        case FIXED:
            return serdeConstants.BINARY_TYPE_NAME;
        case BINARY:
            return serdeConstants.BINARY_TYPE_NAME;
        case DECIMAL:
            final Types.DecimalType decimalType = (Types.DecimalType) type;
            return String.format("decimal(%s,%s)", decimalType.precision(), decimalType.scale());
        case STRUCT:
            final Types.StructType structType = type.asStructType();
            final String nameToType = (String) structType.fields().stream().map((f) -> {
                return String.format("%s:%s", f.name(), fromIcebergToHiveType(f.type()));
            }).collect(Collectors.joining(","));
            return String.format("struct<%s>", nameToType);
        case LIST:
            final Types.ListType listType = type.asListType();
            return String.format("array<%s>", fromIcebergToHiveType(listType.elementType()));
        case MAP:
            final Types.MapType mapType = type.asMapType();
            return String.format("map<%s,%s>", fromIcebergToHiveType(mapType.keyType()),
                fromIcebergToHiveType(mapType.valueType()));
        default:
            throw new UnsupportedOperationException(type + " is not supported");
    }
}
 
Example 16
Source File: OrcMetrics.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public TypeDescription list(Types.ListType iList, TypeDescription array, TypeDescription element) {
  columnsInContainers.addAll(flatten(element));
  return array;
}
 
Example 17
Source File: ORCSchemaUtil.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static Types.NestedField convertOrcToIceberg(TypeDescription orcType, String name,
                                                     TypeUtil.NextID nextID) {

  final int icebergID = icebergID(orcType).orElseGet(nextID::get);
  final boolean isRequired = isRequired(orcType);

  switch (orcType.getCategory()) {
    case BOOLEAN:
      return getIcebergType(icebergID, name, Types.BooleanType.get(), isRequired);
    case BYTE:
    case SHORT:
    case INT:
      return getIcebergType(icebergID, name, Types.IntegerType.get(), isRequired);
    case LONG:
      String longAttributeValue = orcType.getAttributeValue(ICEBERG_LONG_TYPE_ATTRIBUTE);
      LongType longType = longAttributeValue == null ? LongType.LONG : LongType.valueOf(longAttributeValue);
      switch (longType) {
        case TIME:
          return getIcebergType(icebergID, name, Types.TimeType.get(), isRequired);
        case LONG:
          return getIcebergType(icebergID, name, Types.LongType.get(), isRequired);
        default:
          throw new IllegalStateException("Invalid Long type found in ORC type attribute");
      }
    case FLOAT:
      return getIcebergType(icebergID, name, Types.FloatType.get(), isRequired);
    case DOUBLE:
      return getIcebergType(icebergID, name, Types.DoubleType.get(), isRequired);
    case STRING:
    case CHAR:
    case VARCHAR:
      return getIcebergType(icebergID, name, Types.StringType.get(), isRequired);
    case BINARY:
      String binaryAttributeValue = orcType.getAttributeValue(ICEBERG_BINARY_TYPE_ATTRIBUTE);
      BinaryType binaryType = binaryAttributeValue == null ? BinaryType.BINARY :
          BinaryType.valueOf(binaryAttributeValue);
      switch (binaryType) {
        case UUID:
          return getIcebergType(icebergID, name, Types.UUIDType.get(), isRequired);
        case FIXED:
          int fixedLength = Integer.parseInt(orcType.getAttributeValue(ICEBERG_FIELD_LENGTH));
          return getIcebergType(icebergID, name, Types.FixedType.ofLength(fixedLength), isRequired);
        case BINARY:
          return getIcebergType(icebergID, name, Types.BinaryType.get(), isRequired);
        default:
          throw new IllegalStateException("Invalid Binary type found in ORC type attribute");
      }
    case DATE:
      return getIcebergType(icebergID, name, Types.DateType.get(), isRequired);
    case TIMESTAMP:
      return getIcebergType(icebergID, name, Types.TimestampType.withoutZone(), isRequired);
    case TIMESTAMP_INSTANT:
      return getIcebergType(icebergID, name, Types.TimestampType.withZone(), isRequired);
    case DECIMAL:
      return getIcebergType(icebergID, name,
          Types.DecimalType.of(orcType.getPrecision(), orcType.getScale()),
          isRequired);
    case STRUCT: {
      List<String> fieldNames = orcType.getFieldNames();
      List<TypeDescription> fieldTypes = orcType.getChildren();
      List<Types.NestedField> fields = new ArrayList<>(fieldNames.size());
      for (int c = 0; c < fieldNames.size(); ++c) {
        String childName = fieldNames.get(c);
        TypeDescription type = fieldTypes.get(c);
        Types.NestedField field = convertOrcToIceberg(type, childName, nextID);
        fields.add(field);
      }

      return getIcebergType(icebergID, name, Types.StructType.of(fields), isRequired);
    }
    case LIST: {
      TypeDescription elementType = orcType.getChildren().get(0);
      Types.NestedField element = convertOrcToIceberg(elementType, "element", nextID);

      Types.ListType listTypeWithElem = isRequired(elementType) ?
          Types.ListType.ofRequired(element.fieldId(), element.type()) :
          Types.ListType.ofOptional(element.fieldId(), element.type());
      return isRequired ?
          Types.NestedField.required(icebergID, name, listTypeWithElem) :
          Types.NestedField.optional(icebergID, name, listTypeWithElem);
    }
    case MAP: {
      TypeDescription keyType = orcType.getChildren().get(0);
      Types.NestedField key = convertOrcToIceberg(keyType, "key", nextID);
      TypeDescription valueType = orcType.getChildren().get(1);
      Types.NestedField value = convertOrcToIceberg(valueType, "value", nextID);

      Types.MapType mapTypeWithKV = isRequired(valueType) ?
          Types.MapType.ofRequired(key.fieldId(), value.fieldId(), key.type(), value.type()) :
          Types.MapType.ofOptional(key.fieldId(), value.fieldId(), key.type(), value.type());

      return getIcebergType(icebergID, name, mapTypeWithKV, isRequired);
    }
    default:
      // We don't have an answer for union types.
      throw new IllegalArgumentException("Can't handle " + orcType);
  }
}
 
Example 18
Source File: ORCSchemaUtil.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static TypeDescription convert(Integer fieldId, Type type, boolean isRequired) {
  final TypeDescription orcType;

  switch (type.typeId()) {
    case BOOLEAN:
      orcType = TypeDescription.createBoolean();
      break;
    case INTEGER:
      orcType = TypeDescription.createInt();
      break;
    case TIME:
      orcType = TypeDescription.createLong();
      orcType.setAttribute(ICEBERG_LONG_TYPE_ATTRIBUTE, LongType.TIME.toString());
      break;
    case LONG:
      orcType = TypeDescription.createLong();
      orcType.setAttribute(ICEBERG_LONG_TYPE_ATTRIBUTE, LongType.LONG.toString());
      break;
    case FLOAT:
      orcType = TypeDescription.createFloat();
      break;
    case DOUBLE:
      orcType = TypeDescription.createDouble();
      break;
    case DATE:
      orcType = TypeDescription.createDate();
      break;
    case TIMESTAMP:
      Types.TimestampType tsType = (Types.TimestampType) type;
      if (tsType.shouldAdjustToUTC()) {
        orcType = TypeDescription.createTimestampInstant();
      } else {
        orcType = TypeDescription.createTimestamp();
      }
      break;
    case STRING:
      orcType = TypeDescription.createString();
      break;
    case UUID:
      orcType = TypeDescription.createBinary();
      orcType.setAttribute(ICEBERG_BINARY_TYPE_ATTRIBUTE, BinaryType.UUID.toString());
      break;
    case FIXED:
      orcType = TypeDescription.createBinary();
      orcType.setAttribute(ICEBERG_BINARY_TYPE_ATTRIBUTE, BinaryType.FIXED.toString());
      orcType.setAttribute(ICEBERG_FIELD_LENGTH, Integer.toString(((Types.FixedType) type).length()));
      break;
    case BINARY:
      orcType = TypeDescription.createBinary();
      orcType.setAttribute(ICEBERG_BINARY_TYPE_ATTRIBUTE, BinaryType.BINARY.toString());
      break;
    case DECIMAL: {
      Types.DecimalType decimal = (Types.DecimalType) type;
      orcType = TypeDescription.createDecimal()
          .withScale(decimal.scale())
          .withPrecision(decimal.precision());
      break;
    }
    case STRUCT: {
      orcType = TypeDescription.createStruct();
      for (Types.NestedField field : type.asStructType().fields()) {
        TypeDescription childType = convert(field.fieldId(), field.type(), field.isRequired());
        orcType.addField(field.name(), childType);
      }
      break;
    }
    case LIST: {
      Types.ListType list = (Types.ListType) type;
      TypeDescription elementType = convert(list.elementId(), list.elementType(),
          list.isElementRequired());
      orcType = TypeDescription.createList(elementType);
      break;
    }
    case MAP: {
      Types.MapType map = (Types.MapType) type;
      TypeDescription keyType = convert(map.keyId(), map.keyType(), true);
      TypeDescription valueType = convert(map.valueId(), map.valueType(), map.isValueRequired());
      orcType = TypeDescription.createMap(keyType, valueType);
      break;
    }
    default:
      throw new IllegalArgumentException("Unhandled type " + type.typeId());
  }

  // Set Iceberg column attributes for mapping
  orcType.setAttribute(ICEBERG_ID_ATTRIBUTE, String.valueOf(fieldId));
  orcType.setAttribute(ICEBERG_REQUIRED_ATTRIBUTE, String.valueOf(isRequired));
  return orcType;
}
 
Example 19
Source File: TypeWithSchemaVisitor.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public T list(Types.ListType iList, GroupType array, T element) {
  return null;
}
 
Example 20
Source File: TypeConverter.java    From presto with Apache License 2.0 4 votes vote down vote up
public static Type toPrestoType(org.apache.iceberg.types.Type type, TypeManager typeManager)
{
    switch (type.typeId()) {
        case BOOLEAN:
            return BooleanType.BOOLEAN;
        case BINARY:
        case FIXED:
            return VarbinaryType.VARBINARY;
        case DATE:
            return DateType.DATE;
        case DECIMAL:
            Types.DecimalType decimalType = (Types.DecimalType) type;
            return DecimalType.createDecimalType(decimalType.precision(), decimalType.scale());
        case DOUBLE:
            return DoubleType.DOUBLE;
        case LONG:
            return BigintType.BIGINT;
        case FLOAT:
            return RealType.REAL;
        case INTEGER:
            return IntegerType.INTEGER;
        case TIME:
            return TimeType.TIME;
        case TIMESTAMP:
            Types.TimestampType timestampType = (Types.TimestampType) type;
            if (timestampType.shouldAdjustToUTC()) {
                return TimestampWithTimeZoneType.TIMESTAMP_WITH_TIME_ZONE;
            }
            return TimestampType.TIMESTAMP;
        case UUID:
        case STRING:
            return VarcharType.createUnboundedVarcharType();
        case LIST:
            Types.ListType listType = (Types.ListType) type;
            return new ArrayType(toPrestoType(listType.elementType(), typeManager));
        case MAP:
            Types.MapType mapType = (Types.MapType) type;
            TypeSignature keyType = toPrestoType(mapType.keyType(), typeManager).getTypeSignature();
            TypeSignature valueType = toPrestoType(mapType.valueType(), typeManager).getTypeSignature();
            return typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.typeParameter(keyType), TypeSignatureParameter.typeParameter(valueType)));
        case STRUCT:
            List<Types.NestedField> fields = ((Types.StructType) type).fields();
            return RowType.from(fields.stream()
                    .map(field -> new RowType.Field(Optional.of(field.name()), toPrestoType(field.type(), typeManager)))
                    .collect(toImmutableList()));
        default:
            throw new UnsupportedOperationException(format("Cannot convert from Iceberg type '%s' (%s) to Presto type", type, type.typeId()));
    }
}