Java Code Examples for org.apache.iceberg.types.Types#NestedField

The following examples show how to use org.apache.iceberg.types.Types#NestedField . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: StrictMetricsEvaluator.java From iceberg with Apache License 2.0

6 votes

@Override
public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
  // Rows must match when: <----------Min----Max---X------->
  Integer id = ref.fieldId();
  Types.NestedField field = struct.field(id);
  Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));

  if (canContainNulls(id)) {
    return ROWS_MIGHT_NOT_MATCH;
  }

  if (upperBounds != null && upperBounds.containsKey(id)) {
    T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id));

    int cmp = lit.comparator().compare(upper, lit.value());
    if (cmp < 0) {
      return ROWS_MUST_MATCH;
    }
  }

  return ROWS_MIGHT_NOT_MATCH;
}

Example 2

Source File: HiveTypeConverter.java From metacat with Apache License 2.0

6 votes

/**
 * Converts iceberg schema to field dto.
 *
 * @param schema          schema
 * @param partitionFields partitioned fields
 * @return list of field Info
 */
public List<FieldInfo> icebergeSchemaTofieldDtos(final Schema schema,
                                                 final List<PartitionField> partitionFields) {
    final List<FieldInfo> fields = Lists.newArrayList();
    final List<String> partitionNames =
        partitionFields.stream()
            .map(f -> schema.findField(f.sourceId()).name()).collect(Collectors.toList());

    for (Types.NestedField field : schema.columns()) {
        final FieldInfo fieldInfo = new FieldInfo();
        fieldInfo.setName(field.name());
        final org.apache.iceberg.types.Type fieldType = field.type();
        fieldInfo.setSourceType(fieldType.toString());
        fieldInfo.setType(toMetacatType(fromIcebergToHiveType(fieldType)));
        fieldInfo.setIsNullable(field.isOptional());
        fieldInfo.setComment(field.doc());
        fieldInfo.setPartitionKey(partitionNames.contains(field.name()));
        fields.add(fieldInfo);
    }

    return fields;
}

Example 3

Source File: GenericPartitionFieldSummary.java From iceberg with Apache License 2.0

6 votes

/**
 * Used by Avro reflection to instantiate this class when reading manifest files.
 */
public GenericPartitionFieldSummary(Schema avroSchema) {
  this.avroSchema = avroSchema;

  List<Types.NestedField> fields = AvroSchemaUtil.convert(avroSchema)
      .asNestedType()
      .asStructType()
      .fields();
  List<Types.NestedField> allFields = PartitionFieldSummary.getType().fields();

  this.fromProjectionPos = new int[fields.size()];
  for (int i = 0; i < fromProjectionPos.length; i += 1) {
    boolean found = false;
    for (int j = 0; j < allFields.size(); j += 1) {
      if (fields.get(i).fieldId() == allFields.get(j).fieldId()) {
        found = true;
        fromProjectionPos[i] = j;
      }
    }

    if (!found) {
      throw new IllegalArgumentException("Cannot find projected field: " + fields.get(i));
    }
  }
}

Example 4

Source File: SchemaUpdate.java From iceberg with Apache License 2.0

6 votes

private void internalMove(String name, Move move) {
  Integer parentId = idToParent.get(move.fieldId());
  if (parentId != null) {
    Types.NestedField parent = schema.findField(parentId);
    Preconditions.checkArgument(parent.type().isStructType(),
        "Cannot move fields in non-struct type: %s", parent.type());

    if (move.type() == Move.MoveType.AFTER || move.type() == Move.MoveType.BEFORE) {
      Preconditions.checkArgument(
          parentId.equals(idToParent.get(move.referenceFieldId())),
          "Cannot move field %s to a different struct", name);
    }

    moves.put(parentId, move);
  } else {
    if (move.type() == Move.MoveType.AFTER || move.type() == Move.MoveType.BEFORE) {
      Preconditions.checkArgument(
          idToParent.get(move.referenceFieldId()) == null,
          "Cannot move field %s to a different struct", name);
    }

    moves.put(TABLE_ROOT_ID, move);
  }
}

Example 5

Source File: TestHelpers.java From iceberg with Apache License 2.0

5 votes

public static void assertEqualsUnsafe(Types.StructType struct, Record rec, InternalRow row) {
  List<Types.NestedField> fields = struct.fields();
  for (int i = 0; i < fields.size(); i += 1) {
    Type fieldType = fields.get(i).type();

    Object expectedValue = rec.get(i);
    Object actualValue = row.isNullAt(i) ? null : row.get(i, convert(fieldType));

    assertEqualsUnsafe(fieldType, expectedValue, actualValue);
  }
}

Example 6

Source File: SchemaParser.java From iceberg with Apache License 2.0

5 votes

private static Types.StructType structFromJson(JsonNode json) {
  JsonNode fieldArray = json.get(FIELDS);
  Preconditions.checkArgument(fieldArray.isArray(),
      "Cannot parse struct fields from non-array: %s", fieldArray);

  List<Types.NestedField> fields = Lists.newArrayListWithExpectedSize(fieldArray.size());
  Iterator<JsonNode> iterator = fieldArray.elements();
  while (iterator.hasNext()) {
    JsonNode field = iterator.next();
    Preconditions.checkArgument(field.isObject(),
        "Cannot parse struct field from non-object: %s", field);

    int id = JsonUtil.getInt(ID, field);
    String name = JsonUtil.getString(NAME, field);
    Type type = typeFromJson(field.get(TYPE));

    String doc = JsonUtil.getStringOrNull(DOC, field);
    boolean isRequired = JsonUtil.getBool(REQUIRED, field);
    if (isRequired) {
      fields.add(Types.NestedField.required(id, name, type, doc));
    } else {
      fields.add(Types.NestedField.optional(id, name, type, doc));
    }
  }

  return Types.StructType.of(fields);
}

Example 7

Source File: SchemaConverter.java From dremio-oss with Apache License 2.0

5 votes

public static CompleteType fromIcebergType(Type type) {
  if (type.isPrimitiveType()) {
    return fromIcebergPrimitiveType(type.asPrimitiveType());
  } else {
    NestedType nestedType = type.asNestedType();
    if (nestedType.isListType()) {
      ListType listType = (ListType)nestedType;
      NestedField elementField = listType.fields().get(0);
      CompleteType elementType = fromIcebergType(elementField.type());
      return (elementType == null) ? null : elementType.asList();
    } else if (nestedType.isStructType()) {
      StructType structType = (StructType)nestedType;
      List<Types.NestedField> structFields = structType.fields();
      List<Field> innerFields = Lists.newArrayList();
      for (Types.NestedField nestedField : structFields) {
        Field field = fromIcebergColumn(nestedField);
        if (field == null) {
          return null;
        }
        innerFields.add(field);
      }
      return CompleteType.struct(innerFields);
    } else {
      // drop map type and all other unknown iceberg column types
      return null;
    }
  }
}

Example 8

Source File: NamedReference.java From iceberg with Apache License 2.0

5 votes

@Override
public BoundReference<T> bind(Types.StructType struct, boolean caseSensitive) {
  Schema schema = new Schema(struct.fields());
  Types.NestedField field = caseSensitive ?
      schema.findField(name) :
      schema.caseInsensitiveFindField(name);

  ValidationException.check(field != null,
      "Cannot find field '%s' in struct: %s", name, schema.asStruct());

  return new BoundReference<>(field, schema.accessorForField(field.fieldId()));
}

Example 9

Source File: PartitionSpec.java From iceberg with Apache License 2.0

5 votes

public Builder year(String sourceName, String targetName) {
  checkAndAddPartitionName(targetName);
  Types.NestedField sourceColumn = findSourceColumn(sourceName);
  PartitionField field = new PartitionField(
      sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.year(sourceColumn.type()));
  checkForRedundantPartitions(field);
  fields.add(field);
  return this;
}

Example 10

Source File: PartitionSpec.java From iceberg with Apache License 2.0

5 votes

Builder identity(String sourceName, String targetName) {
  Types.NestedField sourceColumn = findSourceColumn(sourceName);
  checkAndAddPartitionName(targetName, sourceColumn.fieldId());
  fields.add(new PartitionField(
      sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.identity(sourceColumn.type())));
  return this;
}

Example 11

Source File: TestSparkTableUtilWithInMemoryCatalog.java From iceberg with Apache License 2.0

5 votes

private void checkFieldMetrics(Dataset<Row> fileDF, Types.NestedField field, boolean isNull) {
  List<Row> metricRows = fileDF
      .selectExpr(
          String.format("lower_bounds['%d']", field.fieldId()),
          String.format("upper_bounds['%d']", field.fieldId())
      )
      .collectAsList();

  metricRows.forEach(row -> {
    Assert.assertEquals("Invalid metrics for column: " + field.name(), isNull, row.isNullAt(0));
    Assert.assertEquals("Invalid metrics for column: " + field.name(), isNull, row.isNullAt(1));
  });
}

Example 12

Source File: IcebergFilterGenerator.java From metacat with Apache License 2.0

5 votes

/**
 * Constructor.
 *
 * @param fields partition fields
 */
public IcebergFilterGenerator(final List<Types.NestedField> fields) {
    fieldMap = Maps.newHashMap();
    for (final Types.NestedField field : fields) {
        fieldMap.put(field.name(), field);
    }
}

Example 13

Source File: IcebergPartitionData.java From dremio-oss with Apache License 2.0

5 votes

public IcebergPartitionData(Types.StructType partitionType) {
  for (Types.NestedField field : partitionType.fields()) {
    Preconditions.checkArgument(field.type().isPrimitiveType(),
      "Partitions cannot contain nested types: %s", field.type());
  }

  this.partitionType = partitionType;
  this.size = partitionType.fields().size();
  this.data = new Object[size];
}

Example 14

Source File: IcebergCatalog.java From dremio-oss with Apache License 2.0

5 votes

void addColumns(List<Types.NestedField> columnsToAdd) {
  IcebergTableOperations tableOperations = new IcebergTableOperations(fsPath, configuration);
  table = new BaseTable(tableOperations, fsPath.getName());
  UpdateSchema updateSchema = table.updateSchema();
  columnsToAdd.forEach(c -> updateSchema.addColumn(c.name(), c.type()));
  updateSchema.commit();
}

Example 15

Source File: SchemaUtil.java From iceberg with Apache License 2.0

5 votes

public static Schema project(Schema schema, List<String> requiredFields) {
  List<Types.NestedField> columns = Lists.newArrayList();

  for (String column : requiredFields) {
    columns.add(schema.findField(column));
  }

  return new Schema(columns);
}

Example 16

Source File: TypeConverter.java From presto with Apache License 2.0

5 votes

private static List<OrcType> toOrcStructType(int nextFieldTypeIndex, Types.StructType structType, Map<String, String> attributes)
{
    nextFieldTypeIndex++;
    List<OrcColumnId> fieldTypeIndexes = new ArrayList<>();
    List<String> fieldNames = new ArrayList<>();
    List<List<OrcType>> fieldTypesList = new ArrayList<>();
    for (Types.NestedField field : structType.fields()) {
        fieldTypeIndexes.add(new OrcColumnId(nextFieldTypeIndex));
        fieldNames.add(field.name());
        Map<String, String> fieldAttributes = ImmutableMap.<String, String>builder()
                .put(ORC_ICEBERG_ID_KEY, Integer.toString(field.fieldId()))
                .put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(field.isRequired()))
                .build();
        List<OrcType> fieldOrcTypes = toOrcType(nextFieldTypeIndex, field.type(), fieldAttributes);
        fieldTypesList.add(fieldOrcTypes);
        nextFieldTypeIndex += fieldOrcTypes.size();
    }

    ImmutableList.Builder<OrcType> orcTypes = ImmutableList.builder();
    orcTypes.add(new OrcType(
            OrcType.OrcTypeKind.STRUCT,
            fieldTypeIndexes,
            fieldNames,
            Optional.empty(),
            Optional.empty(),
            Optional.empty(),
            attributes));
    fieldTypesList.forEach(orcTypes::addAll);

    return orcTypes.build();
}

Example 17

Source File: TypeConverter.java From presto with Apache License 2.0

5 votes

private static org.apache.iceberg.types.Type fromRow(RowType type)
{
    List<Types.NestedField> fields = new ArrayList<>();
    for (RowType.Field field : type.getFields()) {
        String name = field.getName().orElseThrow(() ->
                new PrestoException(NOT_SUPPORTED, "Row type field does not have a name: " + type.getDisplayName()));
        fields.add(Types.NestedField.required(fields.size() + 1, name, toIcebergType(field.getType())));
    }
    return Types.StructType.of(fields);
}

Example 18

Source File: TypeConverter.java From presto with Apache License 2.0

4 votes

public static Type toPrestoType(org.apache.iceberg.types.Type type, TypeManager typeManager)
{
    switch (type.typeId()) {
        case BOOLEAN:
            return BooleanType.BOOLEAN;
        case BINARY:
        case FIXED:
            return VarbinaryType.VARBINARY;
        case DATE:
            return DateType.DATE;
        case DECIMAL:
            Types.DecimalType decimalType = (Types.DecimalType) type;
            return DecimalType.createDecimalType(decimalType.precision(), decimalType.scale());
        case DOUBLE:
            return DoubleType.DOUBLE;
        case LONG:
            return BigintType.BIGINT;
        case FLOAT:
            return RealType.REAL;
        case INTEGER:
            return IntegerType.INTEGER;
        case TIME:
            return TimeType.TIME;
        case TIMESTAMP:
            Types.TimestampType timestampType = (Types.TimestampType) type;
            if (timestampType.shouldAdjustToUTC()) {
                return TimestampWithTimeZoneType.TIMESTAMP_WITH_TIME_ZONE;
            }
            return TimestampType.TIMESTAMP;
        case UUID:
        case STRING:
            return VarcharType.createUnboundedVarcharType();
        case LIST:
            Types.ListType listType = (Types.ListType) type;
            return new ArrayType(toPrestoType(listType.elementType(), typeManager));
        case MAP:
            Types.MapType mapType = (Types.MapType) type;
            TypeSignature keyType = toPrestoType(mapType.keyType(), typeManager).getTypeSignature();
            TypeSignature valueType = toPrestoType(mapType.valueType(), typeManager).getTypeSignature();
            return typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.typeParameter(keyType), TypeSignatureParameter.typeParameter(valueType)));
        case STRUCT:
            List<Types.NestedField> fields = ((Types.StructType) type).fields();
            return RowType.from(fields.stream()
                    .map(field -> new RowType.Field(Optional.of(field.name()), toPrestoType(field.type(), typeManager)))
                    .collect(toImmutableList()));
        default:
            throw new UnsupportedOperationException(format("Cannot convert from Iceberg type '%s' (%s) to Presto type", type, type.typeId()));
    }
}

Example 19

Source File: OrcMetrics.java From iceberg with Apache License 2.0

4 votes

private static Metrics buildOrcMetrics(final long numOfRows, final TypeDescription orcSchema,
                                       final ColumnStatistics[] colStats) {
  final Schema schema = ORCSchemaUtil.convert(orcSchema);
  final Set<TypeDescription> columnsInContainers = findColumnsInContainers(schema, orcSchema);
  Map<Integer, Long> columnSizes = Maps.newHashMapWithExpectedSize(colStats.length);
  Map<Integer, Long> valueCounts = Maps.newHashMapWithExpectedSize(colStats.length);
  Map<Integer, Long> nullCounts = Maps.newHashMapWithExpectedSize(colStats.length);
  Map<Integer, ByteBuffer> lowerBounds = Maps.newHashMap();
  Map<Integer, ByteBuffer> upperBounds = Maps.newHashMap();

  for (int i = 0; i < colStats.length; i++) {
    final ColumnStatistics colStat = colStats[i];
    final TypeDescription orcCol = orcSchema.findSubtype(i);
    final Optional<Types.NestedField> icebergColOpt = ORCSchemaUtil.icebergID(orcCol)
        .map(schema::findField);

    if (icebergColOpt.isPresent()) {
      final Types.NestedField icebergCol = icebergColOpt.get();
      final int fieldId = icebergCol.fieldId();

      columnSizes.put(fieldId, colStat.getBytesOnDisk());

      if (!columnsInContainers.contains(orcCol)) {
        // Since ORC does not track null values nor repeated ones, the value count for columns in
        // containers (maps, list) may be larger than what it actually is, however these are not
        // used in experssions right now. For such cases, we use the value number of values
        // directly stored in ORC.
        if (colStat.hasNull()) {
          nullCounts.put(fieldId, numOfRows - colStat.getNumberOfValues());
        } else {
          nullCounts.put(fieldId, 0L);
        }
        valueCounts.put(fieldId, colStat.getNumberOfValues() + nullCounts.get(fieldId));

        Optional<ByteBuffer> orcMin = (colStat.getNumberOfValues() > 0) ?
            fromOrcMin(icebergCol, colStat) : Optional.empty();
        orcMin.ifPresent(byteBuffer -> lowerBounds.put(icebergCol.fieldId(), byteBuffer));
        Optional<ByteBuffer> orcMax = (colStat.getNumberOfValues() > 0) ?
            fromOrcMax(icebergCol, colStat) : Optional.empty();
        orcMax.ifPresent(byteBuffer -> upperBounds.put(icebergCol.fieldId(), byteBuffer));
      }
    }
  }

  return new Metrics(numOfRows,
      columnSizes,
      valueCounts,
      nullCounts,
      lowerBounds,
      upperBounds);
}

Example 20

Source File: IcebergOrcFileWriter.java From presto with Apache License 2.0

4 votes

private static Metrics computeMetrics(Schema icebergSchema, ColumnMetadata<OrcType> orcColumns, long fileRowCount, Optional<ColumnMetadata<ColumnStatistics>> columnStatistics)
{
    if (columnStatistics.isEmpty()) {
        return new Metrics(fileRowCount, null, null, null, null, null);
    }
    // Columns that are descendants of LIST or MAP types are excluded because:
    // 1. Their stats are not used by Apache Iceberg to filter out data files
    // 2. Their record count can be larger than table-level row count. There's no good way to calculate nullCounts for them.
    // See https://github.com/apache/iceberg/pull/199#discussion_r429443627
    Set<OrcColumnId> excludedColumns = getExcludedColumns(orcColumns);

    ImmutableMap.Builder<Integer, Long> valueCountsBuilder = ImmutableMap.builder();
    ImmutableMap.Builder<Integer, Long> nullCountsBuilder = ImmutableMap.builder();
    ImmutableMap.Builder<Integer, ByteBuffer> lowerBoundsBuilder = ImmutableMap.builder();
    ImmutableMap.Builder<Integer, ByteBuffer> upperBoundsBuilder = ImmutableMap.builder();

    // OrcColumnId(0) is the root column that represents file-level schema
    for (int i = 1; i < orcColumns.size(); i++) {
        OrcColumnId orcColumnId = new OrcColumnId(i);
        if (excludedColumns.contains(orcColumnId)) {
            continue;
        }
        OrcType orcColumn = orcColumns.get(orcColumnId);
        ColumnStatistics orcColumnStats = columnStatistics.get().get(orcColumnId);
        int icebergId = getIcebergId(orcColumn);
        Types.NestedField icebergField = icebergSchema.findField(icebergId);
        verify(icebergField != null, "Cannot find Iceberg column with ID %s in schema %s", icebergId, icebergSchema);
        valueCountsBuilder.put(icebergId, fileRowCount);
        if (orcColumnStats.hasNumberOfValues()) {
            nullCountsBuilder.put(icebergId, fileRowCount - orcColumnStats.getNumberOfValues());
        }
        toIcebergMinMax(orcColumnStats, icebergField.type()).ifPresent(minMax -> {
            lowerBoundsBuilder.put(icebergId, minMax.getMin());
            upperBoundsBuilder.put(icebergId, minMax.getMax());
        });
    }
    Map<Integer, Long> valueCounts = valueCountsBuilder.build();
    Map<Integer, Long> nullCounts = nullCountsBuilder.build();
    Map<Integer, ByteBuffer> lowerBounds = lowerBoundsBuilder.build();
    Map<Integer, ByteBuffer> upperBounds = upperBoundsBuilder.build();
    return new Metrics(
            fileRowCount,
            null, // TODO: Add column size accounting to ORC column writers
            valueCounts.isEmpty() ? null : valueCounts,
            nullCounts.isEmpty() ? null : nullCounts,
            lowerBounds.isEmpty() ? null : lowerBounds,
            upperBounds.isEmpty() ? null : upperBounds);
}