Java Code Examples for org.apache.iceberg.types.Type#NestedType

The following examples show how to use org.apache.iceberg.types.Type#NestedType . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SchemaParser.java    From iceberg with Apache License 2.0 6 votes vote down vote up
static void toJson(Type type, JsonGenerator generator) throws IOException {
  if (type.isPrimitiveType()) {
    toJson(type.asPrimitiveType(), generator);
  } else {
    Type.NestedType nested = type.asNestedType();
    switch (type.typeId()) {
      case STRUCT:
        toJson(nested.asStructType(), generator);
        break;
      case LIST:
        toJson(nested.asListType(), generator);
        break;
      case MAP:
        toJson(nested.asMapType(), generator);
        break;
      default:
        throw new IllegalArgumentException("Cannot write unknown type: " + type);
    }
  }
}
 
Example 2
Source File: FilesTable.java    From presto with Apache License 2.0 5 votes vote down vote up
private static void populateIcebergIdToTypeMapping(Types.NestedField field, ImmutableMap.Builder<Integer, Type> icebergIdToTypeMapping)
{
    Type type = field.type();
    icebergIdToTypeMapping.put(field.fieldId(), type);
    if (type instanceof Type.NestedType) {
        type.asNestedType().fields().forEach(child -> populateIcebergIdToTypeMapping(child, icebergIdToTypeMapping));
    }
}
 
Example 3
Source File: ParquetMetricsRowGroupFilter.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public <T> Boolean notNull(BoundReference<T> ref) {
  // no need to check whether the field is required because binding evaluates that case
  // if the column has no non-null values, the expression cannot match
  Integer id = ref.fieldId();

  // When filtering nested types notNull() is implicit filter passed even though complex
  // filters aren't pushed down in Parquet. Leave all nested column type filters to be
  // evaluated post scan.
  if (schema.findType(id) instanceof Type.NestedType) {
    return ROWS_MIGHT_MATCH;
  }

  Long valueCount = valueCounts.get(id);
  if (valueCount == null) {
    // the column is not present and is all nulls
    return ROWS_CANNOT_MATCH;
  }

  Statistics<?> colStats = stats.get(id);
  if (colStats != null && valueCount - colStats.getNumNulls() == 0) {
    // (num nulls == value count) => all values are null => no non-null values
    return ROWS_CANNOT_MATCH;
  }

  return ROWS_MIGHT_MATCH;
}
 
Example 4
Source File: ParquetMetricsRowGroupFilter.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
  Integer id = ref.fieldId();

  // When filtering nested types notNull() is implicit filter passed even though complex
  // filters aren't pushed down in Parquet. Leave all nested column type filters to be
  // evaluated post scan.
  if (schema.findType(id) instanceof Type.NestedType) {
    return ROWS_MIGHT_MATCH;
  }

  Long valueCount = valueCounts.get(id);
  if (valueCount == null) {
    // the column is not present and is all nulls
    return ROWS_CANNOT_MATCH;
  }

  Statistics<?> colStats = stats.get(id);
  if (colStats != null && !colStats.isEmpty()) {
    if (!colStats.hasNonNullValue()) {
      return ROWS_CANNOT_MATCH;
    }

    T lower = min(colStats, id);
    int cmp = lit.comparator().compare(lower, lit.value());
    if (cmp > 0) {
      return ROWS_CANNOT_MATCH;
    }

    T upper = max(colStats, id);
    cmp = lit.comparator().compare(upper, lit.value());
    if (cmp < 0) {
      return ROWS_CANNOT_MATCH;
    }
  }

  return ROWS_MIGHT_MATCH;
}
 
Example 5
Source File: ParquetMetricsRowGroupFilter.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
  Integer id = ref.fieldId();

  // When filtering nested types notNull() is implicit filter passed even though complex
  // filters aren't pushed down in Parquet. Leave all nested column type filters to be
  // evaluated post scan.
  if (schema.findType(id) instanceof Type.NestedType) {
    return ROWS_MIGHT_MATCH;
  }

  Long valueCount = valueCounts.get(id);
  if (valueCount == null) {
    // the column is not present and is all nulls
    return ROWS_CANNOT_MATCH;
  }

  Statistics<?> colStats = stats.get(id);
  if (colStats != null && !colStats.isEmpty()) {
    if (!colStats.hasNonNullValue()) {
      return ROWS_CANNOT_MATCH;
    }

    Collection<T> literals = literalSet;

    T lower = min(colStats, id);
    literals = literals.stream().filter(v -> ref.comparator().compare(lower, v) <= 0).collect(Collectors.toList());
    if (literals.isEmpty()) {  // if all values are less than lower bound, rows cannot match.
      return ROWS_CANNOT_MATCH;
    }

    T upper = max(colStats, id);
    literals = literals.stream().filter(v -> ref.comparator().compare(upper, v) >= 0).collect(Collectors.toList());
    if (literals.isEmpty()) { // if all remaining values are greater than upper bound, rows cannot match.
      return ROWS_CANNOT_MATCH;
    }
  }

  return ROWS_MIGHT_MATCH;
}
 
Example 6
Source File: SchemaUpdate.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private void internalAddColumn(String parent, String name, boolean isOptional, Type type, String doc) {
  int parentId = TABLE_ROOT_ID;
  String fullName;
  if (parent != null) {
    Types.NestedField parentField = schema.findField(parent);
    Preconditions.checkArgument(parentField != null, "Cannot find parent struct: %s", parent);
    Type parentType = parentField.type();
    if (parentType.isNestedType()) {
      Type.NestedType nested = parentType.asNestedType();
      if (nested.isMapType()) {
        // fields are added to the map value type
        parentField = nested.asMapType().fields().get(1);
      } else if (nested.isListType()) {
        // fields are added to the element type
        parentField = nested.asListType().fields().get(0);
      }
    }
    Preconditions.checkArgument(
        parentField.type().isNestedType() && parentField.type().asNestedType().isStructType(),
        "Cannot add to non-struct column: %s: %s", parent, parentField.type());
    parentId = parentField.fieldId();
    Preconditions.checkArgument(!deletes.contains(parentId),
        "Cannot add to a column that will be deleted: %s", parent);
    Preconditions.checkArgument(schema.findField(parent + "." + name) == null,
        "Cannot add column, name already exists: %s.%s", parent, name);
    fullName = schema.findColumnName(parentId) + "." + name;
  } else {
    Preconditions.checkArgument(schema.findField(name) == null,
        "Cannot add column, name already exists: %s", name);
    fullName = name;
  }

  // assign new IDs in order
  int newId = assignNewColumnId();

  // update tracking for moves
  addedNameToId.put(fullName, newId);
  if (parentId != TABLE_ROOT_ID) {
    idToParent.put(newId, parentId);
  }

  adds.put(parentId, Types.NestedField.of(newId, isOptional, name,
      TypeUtil.assignFreshIds(type, this::assignNewColumnId), doc));
}