org.apache.iceberg.types.Types.NestedField Java Examples

The following examples show how to use org.apache.iceberg.types.Types.NestedField. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IcebergMetadata.java    From presto with Apache License 2.0 6 votes vote down vote up
private static Schema toIcebergSchema(List<ColumnMetadata> columns)
{
    List<NestedField> icebergColumns = new ArrayList<>();
    for (ColumnMetadata column : columns) {
        if (!column.isHidden()) {
            int index = icebergColumns.size();
            Type type = toIcebergType(column.getType());
            NestedField field = column.isNullable()
                    ? NestedField.optional(index, column.getName(), type, column.getComment())
                    : NestedField.required(index, column.getName(), type, column.getComment());
            icebergColumns.add(field);
        }
    }
    Schema schema = new Schema(icebergColumns);
    AtomicInteger nextFieldId = new AtomicInteger(1);
    return TypeUtil.assignFreshIds(schema, nextFieldId::getAndIncrement);
}
 
Example #2
Source File: TestSchemaConverter.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
@Test
public void map() {
  org.apache.iceberg.Schema icebergSchema = new org.apache.iceberg.Schema(
    NestedField.optional(1, "map",
      Types.MapType.ofOptional(2, 3, Types.IntegerType.get(), Types.FloatType.get()))
  );

  List<Field> children = Arrays.asList(
    CompleteType.INT.toField("key"),
    CompleteType.FLOAT.toField("value")
  );
  BatchSchema schema = BatchSchema.newBuilder()
    .addField(new CompleteType(new ArrowType.Map(false), children).toField("map"))
    .build();

  BatchSchema result = schemaConverter.fromIceberg(icebergSchema);
  // dremio silently drops map type columns
  assertEquals(result.getFieldCount(), 0);

  org.apache.iceberg.Schema icebergResult = schemaConverter.toIceberg(schema);
  assertEquals(icebergSchema.toString(), icebergResult.toString());
}
 
Example #3
Source File: TestIcebergPartitions.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  schema = new Schema(
    NestedField.optional(1, ID, Types.IntegerType.get()),
    NestedField.optional(2, NAME, Types.StringType.get())
  );

  spec = PartitionSpec
    .builderFor(schema)
    .identity(ID)
    .identity(NAME)
    .build();
}
 
Example #4
Source File: IcebergMetadata.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public ConnectorInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle)
{
    IcebergTableHandle table = (IcebergTableHandle) tableHandle;
    org.apache.iceberg.Table icebergTable = getIcebergTable(metastore, hdfsEnvironment, session, table.getSchemaTableName());
    boolean orcFormat = ORC == getFileFormat(icebergTable);

    for (NestedField column : icebergTable.schema().columns()) {
        io.prestosql.spi.type.Type type = toPrestoType(column.type(), typeManager);
        if (type instanceof DecimalType && !orcFormat) {
            throw new PrestoException(NOT_SUPPORTED, "Writing to columns of type decimal not yet supported");
        }
        if (type instanceof TimestampType && !orcFormat) {
            throw new PrestoException(NOT_SUPPORTED, "Writing to columns of type timestamp not yet supported for PARQUET format");
        }
    }

    transaction = icebergTable.newTransaction();

    return new IcebergWritableTableHandle(
            table.getSchemaName(),
            table.getTableName(),
            SchemaParser.toJson(icebergTable.schema()),
            PartitionSpecParser.toJson(icebergTable.spec()),
            getColumns(icebergTable.schema(), typeManager),
            getDataPath(icebergTable.location()),
            getFileFormat(icebergTable));
}
 
Example #5
Source File: TestRefresh.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  schema = new Schema(
    NestedField.optional(1, "n_nationkey", Types.IntegerType.get()),
    NestedField.optional(2, "n_name", Types.StringType.get()),
    NestedField.optional(3, "n_regionkey", Types.IntegerType.get()),
    NestedField.optional(4, "n_comment", Types.StringType.get())
  );
}
 
Example #6
Source File: TestSchemaConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Test
public void unsupportedIcebergTypes() {
  org.apache.iceberg.Schema schema = new org.apache.iceberg.Schema(
    NestedField.optional(1, "timestamp_nozone_field", Types.TimestampType.withoutZone())
  );

  expectedEx.expect(UserException.class);
  expectedEx.expectMessage("conversion from iceberg type to arrow type failed for field timestamp_nozone_field");
  SchemaConverter convert = new SchemaConverter();
  convert.fromIceberg(schema);
}
 
Example #7
Source File: TestSchemaConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Test
public void testPartitionComparatorField() {
  BatchSchema inputschema = BatchSchema.newBuilder()
    .addField(CompleteType.BIT.toField("boolean"))
    .addField(CompleteType.INT.toField("int"))
    .addField(CompleteType.BIT.toField(WriterPrel.PARTITION_COMPARATOR_FIELD))
    .build();

  org.apache.iceberg.Schema expectedSchema = new org.apache.iceberg.Schema(
    NestedField.optional(1, "boolean", Types.BooleanType.get()),
    NestedField.optional(2, "int", Types.IntegerType.get()));

  SchemaConverter convert = new SchemaConverter();
  assertEquals(convert.toIceberg(inputschema).toString(), expectedSchema.toString());
}
 
Example #8
Source File: TestSchemaConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Test
public void missingArrowTypes() {
  org.apache.iceberg.Schema icebergSchema = new org.apache.iceberg.Schema(
    NestedField.optional(1, "uuid", Types.UUIDType.get())
  );

  BatchSchema schema = BatchSchema.newBuilder()
    .addField(new CompleteType(new FixedSizeBinary(16)).toField("uuid"))
    .build();

  BatchSchema result = schemaConverter.fromIceberg(icebergSchema);
  assertEquals(result, schema);
}
 
Example #9
Source File: TestSchemaConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Test
public void primitiveBasic() {
  org.apache.iceberg.Schema icebergSchema = new org.apache.iceberg.Schema(
    NestedField.optional(1, "boolean", Types.BooleanType.get()),
    NestedField.optional(2, "int", Types.IntegerType.get()),
    NestedField.optional(3, "long", Types.LongType.get()),
    NestedField.optional(4, "float", Types.FloatType.get()),
    NestedField.optional(5, "double", Types.DoubleType.get()),
    NestedField.optional(6, "decimal_38_16", Types.DecimalType.of(38, 16)),
    NestedField.optional(7, "string", Types.StringType.get()),
    NestedField.optional(8, "binary", Types.BinaryType.get()),
    NestedField.optional(9, "date", Types.DateType.get()),
    NestedField.optional(10, "time", Types.TimeType.get()),
    NestedField.optional(11, "fixed_32", Types.FixedType.ofLength(32)),
    NestedField.optional(12, "timestamp", Types.TimestampType.withZone())
  );

  BatchSchema schema = BatchSchema.newBuilder()
    .addField(CompleteType.BIT.toField("boolean"))
    .addField(CompleteType.INT.toField("int"))
    .addField(CompleteType.BIGINT.toField("long"))
    .addField(CompleteType.FLOAT.toField("float"))
    .addField(CompleteType.DOUBLE.toField("double"))
    .addField(new CompleteType(new Decimal(38, 16)).toField("decimal_38_16"))
    .addField(CompleteType.VARCHAR.toField("string"))
    .addField(CompleteType.VARBINARY.toField("binary"))
    .addField(CompleteType.DATE.toField("date"))
    .addField(CompleteType.TIME.toField("time"))
    .addField(new CompleteType(new ArrowType.FixedSizeBinary(32)).toField("fixed_32"))
    .addField(CompleteType.TIMESTAMP.toField("timestamp"))
    .build();

  assertEquals(schema, schemaConverter.fromIceberg(icebergSchema));
  assertEquals(icebergSchema.toString(), schemaConverter.toIceberg(schema).toString());
}
 
Example #10
Source File: TestIcebergTableDrop.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  schema = new Schema(
    NestedField.optional(1, "n_nationkey", Types.IntegerType.get()),
    NestedField.optional(2, "n_name", Types.StringType.get()),
    NestedField.optional(3, "n_regionkey", Types.IntegerType.get()),
    NestedField.optional(4, "n_comment", Types.StringType.get())
  );
}
 
Example #11
Source File: SchemaConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static Schema getChildSchemaForList(Schema schema, String listName) {
  if (schema == null) {
    return null;
  }

  NestedField listField = schema.findField(listName);
  if (!listField.type().isListType()) {
    return null;

  }

  return new Schema(listField.type().asListType().fields().get(0));
}
 
Example #12
Source File: SchemaConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static Schema getChildSchemaForStruct(Schema schema, String structName) {
  if (schema == null) {
    return null;
  }

  NestedField structField = schema.findField(structName);
  if (!structField.type().isStructType()) {
    return null;
  }

  return new Schema(structField.type().asStructType().fields());
}
 
Example #13
Source File: SchemaConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static NestedField toIcebergColumn(Field field) {
  try {
    return NestedField.optional(0, field.getName(), toIcebergType(CompleteType.fromField(field)));
  } catch (Exception e) {
    throw UserException.unsupportedError(e)
      .message("conversion from arrow type to iceberg type failed for field " + field.getName())
      .buildSilently();
  }
}
 
Example #14
Source File: SchemaConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static CompleteType fromIcebergType(Type type) {
  if (type.isPrimitiveType()) {
    return fromIcebergPrimitiveType(type.asPrimitiveType());
  } else {
    NestedType nestedType = type.asNestedType();
    if (nestedType.isListType()) {
      ListType listType = (ListType)nestedType;
      NestedField elementField = listType.fields().get(0);
      CompleteType elementType = fromIcebergType(elementField.type());
      return (elementType == null) ? null : elementType.asList();
    } else if (nestedType.isStructType()) {
      StructType structType = (StructType)nestedType;
      List<Types.NestedField> structFields = structType.fields();
      List<Field> innerFields = Lists.newArrayList();
      for (Types.NestedField nestedField : structFields) {
        Field field = fromIcebergColumn(nestedField);
        if (field == null) {
          return null;
        }
        innerFields.add(field);
      }
      return CompleteType.struct(innerFields);
    } else {
      // drop map type and all other unknown iceberg column types
      return null;
    }
  }
}
 
Example #15
Source File: SchemaConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static Field fromIcebergColumn(NestedField field) {
  try {
    CompleteType fieldType = fromIcebergType(field.type());
    return fieldType == null ? null : fieldType.toField(field.name());
  } catch (Exception e) {
    throw UserException.unsupportedError(e)
      .message("conversion from iceberg type to arrow type failed for field " + field.name())
      .buildSilently();
  }
}
 
Example #16
Source File: TypeToMessageType.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public MessageType convert(Schema schema, String name) {
  Types.MessageTypeBuilder builder = Types.buildMessage();

  for (NestedField field : schema.columns()) {
    builder.addField(field(field));
  }

  return builder.named(AvroSchemaUtil.makeCompatibleName(name));
}
 
Example #17
Source File: ArrowSchemaUtil.java    From iceberg with Apache License 2.0 5 votes vote down vote up
/**
 * Convert Iceberg schema to Arrow Schema.
 *
 * @param schema iceberg schema
 * @return arrow schema
 */
public static Schema convert(final org.apache.iceberg.Schema schema) {
  ImmutableList.Builder<Field> fields = ImmutableList.builder();

  for (NestedField f : schema.columns()) {
    fields.add(convert(f));
  }

  return new Schema(fields.build());
}
 
Example #18
Source File: Schema.java    From iceberg with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a sub-field by name as a {@link NestedField}.
 * <p>
 * The result may be a top-level or a nested field.
 *
 * @param name a String name
 * @return the sub-field or null if it is not found
 */
public NestedField caseInsensitiveFindField(String name) {
  Preconditions.checkArgument(!name.isEmpty(), "Invalid column name: (empty)");
  Integer id = lazyLowerCaseNameToId().get(name.toLowerCase(Locale.ROOT));
  if (id != null) {
    return lazyIdToField().get(id);
  }
  return null;
}
 
Example #19
Source File: Schema.java    From iceberg with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a sub-field by name as a {@link NestedField}.
 * <p>
 * The result may be a top-level or a nested field.
 *
 * @param name a String name
 * @return a Type for the sub-field or null if it is not found
 */
public NestedField findField(String name) {
  Preconditions.checkArgument(!name.isEmpty(), "Invalid column name: (empty)");
  Integer id = lazyNameToId().get(name);
  if (id != null) {
    return lazyIdToField().get(id);
  }
  return null;
}
 
Example #20
Source File: Schema.java    From iceberg with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the {@link Type} of a sub-field identified by the field id.
 *
 * @param id a field id
 * @return a Type for the sub-field or null if it is not found
 */
public Type findType(int id) {
  NestedField field = lazyIdToField().get(id);
  if (field != null) {
    return field.type();
  }
  return null;
}
 
Example #21
Source File: Schema.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public Schema(List<NestedField> columns, Map<String, Integer> aliases) {
  this.struct = StructType.of(columns);
  this.aliasToId = aliases != null ? ImmutableBiMap.copyOf(aliases) : null;

  // validate the schema through IndexByName visitor
  lazyNameToId();
}
 
Example #22
Source File: TestPartitionFields.java    From presto with Apache License 2.0 5 votes vote down vote up
private static PartitionSpec partitionSpec(Consumer<PartitionSpec.Builder> consumer)
{
    Schema schema = new Schema(
            NestedField.required(1, "order_key", LongType.get()),
            NestedField.required(2, "ts", TimestampType.withoutZone()),
            NestedField.required(3, "price", DoubleType.get()),
            NestedField.optional(4, "comment", StringType.get()),
            NestedField.optional(5, "notes", ListType.ofRequired(6, StringType.get())));

    PartitionSpec.Builder builder = PartitionSpec.builderFor(schema);
    consumer.accept(builder);
    return builder.build();
}
 
Example #23
Source File: HiveTableOperations.java    From presto with Apache License 2.0 5 votes vote down vote up
private static List<Column> toHiveColumns(List<NestedField> columns)
{
    return columns.stream()
            .map(column -> new Column(
                    column.name(),
                    HiveType.valueOf(HiveTypeConverter.convert(column.type())),
                    Optional.empty()))
            .collect(toImmutableList());
}
 
Example #24
Source File: Schema.java    From iceberg with Apache License 2.0 4 votes vote down vote up
/**
 * @return a List of the {@link NestedField columns} in this Schema.
 */
public List<NestedField> columns() {
  return struct.fields();
}
 
Example #25
Source File: Schema.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private Map<Integer, NestedField> lazyIdToField() {
  if (idToField == null) {
    this.idToField = TypeUtil.indexById(struct);
  }
  return idToField;
}
 
Example #26
Source File: Schema.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public Schema(NestedField... columns) {
  this(Arrays.asList(columns));
}
 
Example #27
Source File: TestSchemaConverter.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void mixed() throws Exception {
  BatchSchema schema = BatchSchema.newBuilder()
    .addField(CompleteType.INT.toField("rownum"))
    .addField(CompleteType.VARCHAR.toField("name"))
    .addField(CompleteType.INT.toField("age"))
    .addField(CompleteType.FLOAT.toField("gpa"))
    .addField(CompleteType.BIGINT.toField("studentnum"))
    .addField(CompleteType.TIMESTAMP.toField("create_time"))
    .addField(CompleteType.VARCHAR.asList().toField("interests"))
    .addField(CompleteType.struct(
      CompleteType.VARCHAR.toField("color"),
      CompleteType.VARCHAR.toField("sport"),
      CompleteType.VARCHAR.toField("food")
    ).toField("favorites"))
    .build();

  org.apache.iceberg.Schema expectedSchema = new org.apache.iceberg.Schema(
    NestedField.optional(1, "rownum", Types.IntegerType.get()),
    NestedField.optional(2, "name", Types.StringType.get()),
    NestedField.optional(3, "age", Types.IntegerType.get()),
    NestedField.optional(4, "gpa", Types.FloatType.get()),
    NestedField.optional(5, "studentnum", Types.LongType.get()),
    NestedField.optional(6, "create_time", Types.TimestampType.withZone()),
    NestedField.optional(7, "interests",
      Types.ListType.ofOptional(9, Types.StringType.get())),
    NestedField.optional(8, "favorites",
      Types.StructType.of(
        NestedField.optional(10, "color", Types.StringType.get()),
        NestedField.optional(11, "sport", Types.StringType.get()),
        NestedField.optional(12, "food", Types.StringType.get())
      ))
  );

  org.apache.iceberg.Schema icebergResult = schemaConverter.toIceberg(schema);
  assertEquals(expectedSchema.toString(), icebergResult.toString());

  TemporaryFolder folder = new TemporaryFolder();
  folder.create();

  String rootPath = folder.getRoot().toString();
  Configuration conf = new Configuration();
  IcebergCatalog catalog = new IcebergCatalog(rootPath, conf);
  catalog.beginCreateTable(schema, Collections.emptyList());
  catalog.endCreateTable();

  Table table = new HadoopTables(conf).load(rootPath);
  assertEquals(expectedSchema.toString(), table.schema().toString());
}
 
Example #28
Source File: Schema.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public Schema(List<NestedField> columns) {
  this.struct = StructType.of(columns);
  lazyNameToId();
}
 
Example #29
Source File: Schema.java    From iceberg with Apache License 2.0 2 votes vote down vote up
/**
 * Returns the sub-field identified by the field id as a {@link NestedField}.
 *
 * @param id a field id
 * @return the sub-field or null if it is not found
 */
public NestedField findField(int id) {
  return lazyIdToField().get(id);
}