/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.iceberg.parquet; import java.util.LinkedList; import java.util.List; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; /** * Visitor for traversing a Parquet type with a companion Iceberg type. * * @param <T> the Java class returned by the visitor */ public class TypeWithSchemaVisitor<T> { @SuppressWarnings({"checkstyle:VisibilityModifier", "checkstyle:IllegalType"}) protected LinkedList<String> fieldNames = Lists.newLinkedList(); @SuppressWarnings("checkstyle:CyclomaticComplexity") public static <T> T visit(org.apache.iceberg.types.Type iType, Type type, TypeWithSchemaVisitor<T> visitor) { if (type instanceof MessageType) { Types.StructType struct = iType != null ? iType.asStructType() : null; return visitor.message(struct, (MessageType) type, visitFields(struct, type.asGroupType(), visitor)); } else if (type.isPrimitive()) { org.apache.iceberg.types.Type.PrimitiveType iPrimitive = iType != null ? iType.asPrimitiveType() : null; return visitor.primitive(iPrimitive, type.asPrimitiveType()); } else { // if not a primitive, the typeId must be a group GroupType group = type.asGroupType(); OriginalType annotation = group.getOriginalType(); if (annotation != null) { switch (annotation) { case LIST: Preconditions.checkArgument(!group.isRepetition(Type.Repetition.REPEATED), "Invalid list: top-level group is repeated: %s", group); Preconditions.checkArgument(group.getFieldCount() == 1, "Invalid list: does not contain single repeated field: %s", group); GroupType repeatedElement = group.getFields().get(0).asGroupType(); Preconditions.checkArgument(repeatedElement.isRepetition(Type.Repetition.REPEATED), "Invalid list: inner group is not repeated"); Preconditions.checkArgument(repeatedElement.getFieldCount() <= 1, "Invalid list: repeated group is not a single field: %s", group); Types.ListType list = null; Types.NestedField element = null; if (iType != null) { list = iType.asListType(); element = list.fields().get(0); } visitor.fieldNames.push(repeatedElement.getName()); try { T elementResult = null; if (repeatedElement.getFieldCount() > 0) { elementResult = visitField(element, repeatedElement.getType(0), visitor); } return visitor.list(list, group, elementResult); } finally { visitor.fieldNames.pop(); } case MAP: Preconditions.checkArgument(!group.isRepetition(Type.Repetition.REPEATED), "Invalid map: top-level group is repeated: %s", group); Preconditions.checkArgument(group.getFieldCount() == 1, "Invalid map: does not contain single repeated field: %s", group); GroupType repeatedKeyValue = group.getType(0).asGroupType(); Preconditions.checkArgument(repeatedKeyValue.isRepetition(Type.Repetition.REPEATED), "Invalid map: inner group is not repeated"); Preconditions.checkArgument(repeatedKeyValue.getFieldCount() <= 2, "Invalid map: repeated group does not have 2 fields"); Types.MapType map = null; Types.NestedField keyField = null; Types.NestedField valueField = null; if (iType != null) { map = iType.asMapType(); keyField = map.fields().get(0); valueField = map.fields().get(1); } visitor.fieldNames.push(repeatedKeyValue.getName()); try { T keyResult = null; T valueResult = null; switch (repeatedKeyValue.getFieldCount()) { case 2: // if there are 2 fields, both key and value are projected keyResult = visitField(keyField, repeatedKeyValue.getType(0), visitor); valueResult = visitField(valueField, repeatedKeyValue.getType(1), visitor); break; case 1: // if there is just one, use the name to determine what it is Type keyOrValue = repeatedKeyValue.getType(0); if (keyOrValue.getName().equalsIgnoreCase("key")) { keyResult = visitField(keyField, keyOrValue, visitor); // value result remains null } else { valueResult = visitField(valueField, keyOrValue, visitor); // key result remains null } break; default: // both results will remain null } return visitor.map(map, group, keyResult, valueResult); } finally { visitor.fieldNames.pop(); } default: } } Types.StructType struct = iType != null ? iType.asStructType() : null; return visitor.struct(struct, group, visitFields(struct, group, visitor)); } } private static <T> T visitField(Types.NestedField iField, Type field, TypeWithSchemaVisitor<T> visitor) { visitor.fieldNames.push(field.getName()); try { return visit(iField != null ? iField.type() : null, field, visitor); } finally { visitor.fieldNames.pop(); } } private static <T> List<T> visitFields(Types.StructType struct, GroupType group, TypeWithSchemaVisitor<T> visitor) { List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount()); for (Type field : group.getFields()) { int id = -1; if (field.getId() != null) { id = field.getId().intValue(); } Types.NestedField iField = (struct != null && id >= 0) ? struct.field(id) : null; results.add(visitField(iField, field, visitor)); } return results; } public T message(Types.StructType iStruct, MessageType message, List<T> fields) { return null; } public T struct(Types.StructType iStruct, GroupType struct, List<T> fields) { return null; } public T list(Types.ListType iList, GroupType array, T element) { return null; } public T map(Types.MapType iMap, GroupType map, T key, T value) { return null; } public T primitive(org.apache.iceberg.types.Type.PrimitiveType iPrimitive, PrimitiveType primitive) { return null; } protected String[] currentPath() { return Lists.newArrayList(fieldNames.descendingIterator()).toArray(new String[0]); } protected String[] path(String name) { List<String> list = Lists.newArrayList(fieldNames.descendingIterator()); list.add(name); return list.toArray(new String[0]); } }