Java Code Examples for org.apache.parquet.schema.GroupType#getOriginalType()

The following examples show how to use org.apache.parquet.schema.GroupType#getOriginalType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetTypeVisitor.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static <T> T visit(Type type, ParquetTypeVisitor<T> visitor) {
  if (type instanceof MessageType) {
    return visitor.message((MessageType) type,
        visitFields(type.asGroupType(), visitor));

  } else if (type.isPrimitive()) {
    return visitor.primitive(type.asPrimitiveType());

  } else {
    // if not a primitive, the typeId must be a group
    GroupType group = type.asGroupType();
    OriginalType annotation = group.getOriginalType();
    if (annotation != null) {
      switch (annotation) {
        case LIST:
          return visitList(group, visitor);

        case MAP:
          return visitMap(group, visitor);

        default:
      }
    }

    return visitor.struct(group, visitFields(group, visitor));
  }
}
 
Example 2
Source File: ParquetGroupConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private Converter groupConverter(String fieldName, OutputMutator mutator,
    List<Field> arrowSchema, GroupType groupType, PathSegment colNextChild) {
  Collection<SchemaPath> c = new ArrayList<>();

  if (groupType.getOriginalType() == OriginalType.LIST && colNextChild != null &&
  colNextChild.isNamed() && colNextChild.getNameSegment().getPath().equals("list")) {
    colNextChild = colNextChild.getChild();
  }

  while (colNextChild != null) {
    if (colNextChild.isNamed()) {
      break;
    }
    colNextChild = colNextChild.getChild();
  }

  if (colNextChild != null) {
    SchemaPath s = new SchemaPath(colNextChild.getNameSegment());
    c.add(s);
  }

  if (arrowSchema != null) {
    return groupConverterFromArrowSchema(fieldName, groupType.getName(), groupType, c);
  }

  return defaultGroupConverter(fieldName, mutator, groupType, c, null);
}
 
Example 3
Source File: ParquetGroupConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
Converter defaultGroupConverter(String fieldName, OutputMutator mutator, GroupType groupType,
                                Collection<SchemaPath> c, List<Field> arrowSchema) {

  if (groupType.getOriginalType() == OriginalType.LIST && LogicalListL1Converter.isSupportedSchema(groupType)) {
    return new LogicalListL1Converter(
      columnResolver,
      fieldName,
      mutator,
      getWriterProvider(),
      groupType,
      c,
      options,
      arrowSchema,
      schemaHelper
    );
  }

  final String nameForChild = getNameForChild(columnResolver.getBatchSchemaColumnName(fieldName));
  final StructWriter struct;
  if (groupType.isRepetition(REPEATED)) {
    if (arrowSchema != null) {
      //TODO assert this should never occur at this level
      // only parquet writer that writes arrowSchema doesn't write repeated fields except
      // as part of a LOGICAL LIST, thus this scenario (repeated + arrow schema present) can
      // only happen in LogicalList converter
      arrowSchema = handleRepeatedField(arrowSchema, groupType);
    }
    struct = list(nameForChild).struct();
  } else {
    struct = getWriterProvider().struct(nameForChild);
  }

  return new StructGroupConverter(columnResolver, fieldName, mutator, struct, groupType, c, options, arrowSchema, schemaHelper);
}
 
Example 4
Source File: ParquetTypeHelper.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static Optional<Field> toField(final Type parquetField, final SchemaDerivationHelper schemaHelper) {
  if (parquetField.isPrimitive()) {
    SchemaPath columnSchemaPath = SchemaPath.getCompoundPath(parquetField.getName());
    return Optional.of(createField(columnSchemaPath, parquetField.asPrimitiveType(), parquetField.getOriginalType(), schemaHelper));
  }

  // Handle non-primitive cases
  final GroupType complexField = (GroupType) parquetField;
  if (OriginalType.LIST == complexField.getOriginalType()) {
    GroupType repeatedField = (GroupType) complexField.getFields().get(0);

    // should have only one child field type
    if (repeatedField.isPrimitive() || !repeatedField.isRepetition(REPEATED) || repeatedField.asGroupType().getFields().size() != 1) {
      throw UserException.unsupportedError()
        .message("Parquet List Type is expected to contain only one sub type. Column '%s' contains %d", parquetField.getName(), complexField.getFieldCount())
        .build();
    }

    Optional<Field> subField = toField(repeatedField.getFields().get(0), schemaHelper);
    return subField.map(sf -> new Field(complexField.getName(), true, new ArrowType.List(), Arrays.asList(new Field[] {sf})));
  }

  final boolean isStructType = complexField.getOriginalType() == null;
  if (isStructType) { // it is struct
    return toComplexField(complexField, new ArrowType.Struct(), schemaHelper);
  }

  // Unsupported complex type
  return Optional.empty();
}
 
Example 5
Source File: TestCTAS.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private void verifyFieldHasColumnId(Type field) {
  System.out.println("Verifying column " + field.getName());
  assertTrue("Field " + field.getName() + " does not have column id", field.getId() != null);
  if (field instanceof GroupType) {
    GroupType groupType = (GroupType)field;
    if (groupType.getOriginalType() == OriginalType.LIST) {
      groupType = groupType.getFields().get(0).asGroupType();
    }
    for(Type child : groupType.getFields()) {
      verifyFieldHasColumnId(child);
    }
  }
}
 
Example 6
Source File: List3Levels.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * Will validate the structure of the list
 * @param list the Parquet List
 */
public List3Levels(GroupType list) {
  if (list.getOriginalType() != OriginalType.LIST || list.getFields().size() != 1) {
    throw new IllegalArgumentException("invalid list type: " + list);
  }
  this.list = list;
  Type repeatedField = list.getFields().get(0);
  if (repeatedField.isPrimitive() || !repeatedField.isRepetition(REPEATED) || repeatedField.asGroupType().getFields().size() != 1) {
    throw new IllegalArgumentException("invalid list type: " + list);
  }
  this.repeated = repeatedField.asGroupType();
  this.element = repeated.getFields().get(0);
}
 
Example 7
Source File: ParquetTypeVisitor.java    From presto with Apache License 2.0 4 votes vote down vote up
public static <T> T visit(Type type, ParquetTypeVisitor<T> visitor)
{
    if (type instanceof MessageType) {
        return visitor.message((MessageType) type, visitFields(type.asGroupType(), visitor));
    }
    else if (type.isPrimitive()) {
        return visitor.primitive(type.asPrimitiveType());
    }
    else {
        // if not a primitive, the typeId must be a group
        GroupType group = type.asGroupType();
        OriginalType annotation = group.getOriginalType();
        if (annotation == LIST) {
            checkArgument(!group.isRepetition(REPEATED),
                    "Invalid list: top-level group is repeated: " + group);
            checkArgument(group.getFieldCount() == 1,
                    "Invalid list: does not contain single repeated field: " + group);

            GroupType repeatedElement = group.getFields().get(0).asGroupType();
            checkArgument(repeatedElement.isRepetition(REPEATED),
                    "Invalid list: inner group is not repeated");
            checkArgument(repeatedElement.getFieldCount() <= 1,
                    "Invalid list: repeated group is not a single field: " + group);

            visitor.fieldNames.push(repeatedElement.getName());
            try {
                T elementResult = null;
                if (repeatedElement.getFieldCount() > 0) {
                    elementResult = visitField(repeatedElement.getType(0), visitor);
                }

                return visitor.list(group, elementResult);
            }
            finally {
                visitor.fieldNames.pop();
            }
        }
        else if (annotation == MAP) {
            checkArgument(!group.isRepetition(REPEATED),
                    "Invalid map: top-level group is repeated: " + group);
            checkArgument(group.getFieldCount() == 1,
                    "Invalid map: does not contain single repeated field: " + group);

            GroupType repeatedKeyValue = group.getType(0).asGroupType();
            checkArgument(repeatedKeyValue.isRepetition(REPEATED),
                    "Invalid map: inner group is not repeated");
            checkArgument(repeatedKeyValue.getFieldCount() <= 2,
                    "Invalid map: repeated group does not have 2 fields");

            visitor.fieldNames.push(repeatedKeyValue.getName());
            try {
                T keyResult = null;
                T valueResult = null;
                if (repeatedKeyValue.getFieldCount() == 2) {
                    keyResult = visitField(repeatedKeyValue.getType(0), visitor);
                    valueResult = visitField(repeatedKeyValue.getType(1), visitor);
                }
                else if (repeatedKeyValue.getFieldCount() == 1) {
                    Type keyOrValue = repeatedKeyValue.getType(0);
                    if (keyOrValue.getName().equalsIgnoreCase("key")) {
                        keyResult = visitField(keyOrValue, visitor);
                        // value result remains null
                    }
                    else {
                        valueResult = visitField(keyOrValue, visitor);
                        // key result remains null
                    }
                }
                return visitor.map(group, keyResult, valueResult);
            }
            finally {
                visitor.fieldNames.pop();
            }
        }
        return visitor.struct(group, visitFields(group, visitor));
    }
}
 
Example 8
Source File: ParquetTypeVisitor.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public static <T> T visit(Type type, ParquetTypeVisitor<T> visitor) {
  if (type instanceof MessageType) {
    return visitor.message((MessageType) type,
        visitFields(type.asGroupType(), visitor));

  } else if (type.isPrimitive()) {
    return visitor.primitive(type.asPrimitiveType());

  } else {
    // if not a primitive, the typeId must be a group
    GroupType group = type.asGroupType();
    OriginalType annotation = group.getOriginalType();
    if (annotation != null) {
      switch (annotation) {
        case LIST:
          Preconditions.checkArgument(!group.isRepetition(REPEATED),
              "Invalid list: top-level group is repeated: " + group);
          Preconditions.checkArgument(group.getFieldCount() == 1,
              "Invalid list: does not contain single repeated field: " + group);

          GroupType repeatedElement = group.getFields().get(0).asGroupType();
          Preconditions.checkArgument(repeatedElement.isRepetition(REPEATED),
              "Invalid list: inner group is not repeated");
          Preconditions.checkArgument(repeatedElement.getFieldCount() <= 1,
              "Invalid list: repeated group is not a single field: " + group);

          visitor.fieldNames.push(repeatedElement.getName());
          try {
            T elementResult = null;
            if (repeatedElement.getFieldCount() > 0) {
              elementResult = visitField(repeatedElement.getType(0), visitor);
            }

            return visitor.list(group, elementResult);

          } finally {
            visitor.fieldNames.pop();
          }

        case MAP:
          Preconditions.checkArgument(!group.isRepetition(REPEATED),
              "Invalid map: top-level group is repeated: " + group);
          Preconditions.checkArgument(group.getFieldCount() == 1,
              "Invalid map: does not contain single repeated field: " + group);

          GroupType repeatedKeyValue = group.getType(0).asGroupType();
          Preconditions.checkArgument(repeatedKeyValue.isRepetition(REPEATED),
              "Invalid map: inner group is not repeated");
          Preconditions.checkArgument(repeatedKeyValue.getFieldCount() <= 2,
              "Invalid map: repeated group does not have 2 fields");

          visitor.fieldNames.push(repeatedKeyValue.getName());
          try {
            T keyResult = null;
            T valueResult = null;
            switch (repeatedKeyValue.getFieldCount()) {
              case 2:
                // if there are 2 fields, both key and value are projected
                keyResult = visitField(repeatedKeyValue.getType(0), visitor);
                valueResult = visitField(repeatedKeyValue.getType(1), visitor);
              case 1:
                // if there is just one, use the name to determine what it is
                Type keyOrValue = repeatedKeyValue.getType(0);
                if (keyOrValue.getName().equalsIgnoreCase("key")) {
                  keyResult = visitField(keyOrValue, visitor);
                  // value result remains null
                } else {
                  valueResult = visitField(keyOrValue, visitor);
                  // key result remains null
                }
              default:
                // both results will remain null
            }

            return visitor.map(group, keyResult, valueResult);

          } finally {
            visitor.fieldNames.pop();
          }

        default:
      }
    }

    return visitor.struct(group, visitFields(group, visitor));
  }
}