Java Code Examples for org.apache.parquet.schema.GroupType#getFields()

The following examples show how to use org.apache.parquet.schema.GroupType#getFields() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroWriteSupportInt96Avro18.java    From datacollector with Apache License 2.0 7 votes vote down vote up
private void writeRecordFields(GroupType schema, Schema avroSchema,
    Object record) {
  List<Type> fields = schema.getFields();
  List<Schema.Field> avroFields = avroSchema.getFields();
  int index = 0; // parquet ignores Avro nulls, so index may differ
  for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) {
    Schema.Field avroField = avroFields.get(avroIndex);
    if (avroField.schema().getType().equals(Schema.Type.NULL)) {
      continue;
    }
    Type fieldType = fields.get(index);
    Object value = model.getField(record, avroField.name(), avroIndex);
    if (value != null) {
      recordConsumer.startField(fieldType.getName(), index);
      writeValue(fieldType, avroField.schema(), value);
      recordConsumer.endField(fieldType.getName(), index);
    } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
      throw new RuntimeException("Null-value for required field: " + avroField.name());
    }
    index++;
  }
}
 
Example 2
Source File: ColumnIOFactory.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void visitChildren(GroupColumnIO newIO, GroupType groupType, GroupType requestedGroupType) {
  GroupColumnIO oldIO = current;
  current = newIO;
  for (Type type : groupType.getFields()) {
    // if the file schema does not contain the field it will just stay null
    if (requestedGroupType.containsField(type.getName())) {
      currentRequestedIndex = requestedGroupType.getFieldIndex(type.getName());
      currentRequestedType = requestedGroupType.getType(currentRequestedIndex);
      if (currentRequestedType.getRepetition().isMoreRestrictiveThan(type.getRepetition())) {
        incompatibleSchema(type, currentRequestedType);
      }
      type.accept(this);
    }
  }
  current = oldIO;
}
 
Example 3
Source File: ThriftRecordConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private boolean hasMissingRequiredFieldInGroupType(GroupType requested, GroupType fullSchema) {
  for (Type field : fullSchema.getFields()) {

    if (requested.containsField(field.getName())) {
      Type requestedType = requested.getType(field.getName());
      // if a field is in requested schema and the type of it is a group type, then do recursive check
      if (!field.isPrimitive()) {
        if (hasMissingRequiredFieldInGroupType(requestedType.asGroupType(), field.asGroupType())) {
          return true;
        } else {
          continue;// check next field
        }
      }
    } else {
      if (field.getRepetition() == Type.Repetition.REQUIRED) {
        return true; // if a field is missing in requested schema and it's required
      } else {
        continue; // the missing field is not required, then continue checking next field
      }
    }
  }

  return false;
}
 
Example 4
Source File: AvroWriteSupport.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void writeRecordFields(GroupType schema, Schema avroSchema,
                               Object record) {
  List<Type> fields = schema.getFields();
  List<Schema.Field> avroFields = avroSchema.getFields();
  int index = 0; // parquet ignores Avro nulls, so index may differ
  for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) {
    Schema.Field avroField = avroFields.get(avroIndex);
    if (avroField.schema().getType().equals(Schema.Type.NULL)) {
      continue;
    }
    Type fieldType = fields.get(index);
    Object value = model.getField(record, avroField.name(), avroIndex);
    if (value != null) {
      recordConsumer.startField(fieldType.getName(), index);
      writeValue(fieldType, avroField.schema(), value);
      recordConsumer.endField(fieldType.getName(), index);
    } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
      throw new RuntimeException("Null-value for required field: " + avroField.name());
    }
    index++;
  }
}
 
Example 5
Source File: TajoWriteSupport.java    From tajo with Apache License 2.0 6 votes vote down vote up
private void writeRecordFields(GroupType schema, Schema tajoSchema,
                               Tuple tuple) {
  List<Type> fields = schema.getFields();
  // Parquet ignores Tajo NULL_TYPE columns, so the index may differ.
  int index = 0;
  for (int tajoIndex = 0; tajoIndex < tajoSchema.size(); ++tajoIndex) {
    Column column = tajoSchema.getColumn(tajoIndex);
    if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) {
      continue;
    }
    Type fieldType = fields.get(index);
    if (!tuple.isBlankOrNull(tajoIndex)) {
      recordConsumer.startField(fieldType.getName(), index);
      writeValue(column, tuple, tajoIndex);
      recordConsumer.endField(fieldType.getName(), index);
    } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
      throw new RuntimeException("Null-value for required field: " +
          column.getSimpleName());
    }
    ++index;
  }
}
 
Example 6
Source File: AvroWriteSupportInt96Avro17.java    From datacollector with Apache License 2.0 6 votes vote down vote up
private void writeRecordFields(GroupType schema, Schema avroSchema,
    Object record) {
  List<Type> fields = schema.getFields();
  List<Schema.Field> avroFields = avroSchema.getFields();
  int index = 0; // parquet ignores Avro nulls, so index may differ
  for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) {
    Schema.Field avroField = avroFields.get(avroIndex);
    if (avroField.schema().getType().equals(Schema.Type.NULL)) {
      continue;
    }
    Type fieldType = fields.get(index);
    Object value = model.getField(record, avroField.name(), avroIndex);
    if (value != null) {
      recordConsumer.startField(fieldType.getName(), index);
      writeValue(fieldType, avroField.schema(), value);
      recordConsumer.endField(fieldType.getName(), index);
    } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
      throw new RuntimeException("Null-value for required field: " + avroField.name());
    }
    index++;
  }
}
 
Example 7
Source File: HiveSchemaUtil.java    From hudi with Apache License 2.0 6 votes vote down vote up
/**
 * Create an Array Hive schema from equivalent parquet list type.
 */
private static String createHiveArray(Type elementType, String elementName) {
  StringBuilder array = new StringBuilder();
  array.append("ARRAY< ");
  if (elementType.isPrimitive()) {
    array.append(convertField(elementType));
  } else {
    final GroupType groupType = elementType.asGroupType();
    final List<Type> groupFields = groupType.getFields();
    if (groupFields.size() > 1 || (groupFields.size() == 1
        && (elementType.getName().equals("array") || elementType.getName().equals(elementName + "_tuple")))) {
      array.append(convertField(elementType));
    } else {
      array.append(convertField(groupType.getFields().get(0)));
    }
  }
  array.append(">");
  return array.toString();
}
 
Example 8
Source File: ParquetRecordWriter.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
/**
 * Changes the list inner '$data$' vector name to 'element' in the schema
 */
private Type renameChildTypeToElement(Type childType) {
  if (childType.isPrimitive()) {
    PrimitiveType childPrimitiveType = childType.asPrimitiveType();
    return new PrimitiveType(childType.getRepetition(),
      childPrimitiveType.getPrimitiveTypeName(),
      childPrimitiveType.getTypeLength(),
      "element",
      childPrimitiveType.getOriginalType(),
      childPrimitiveType.getDecimalMetadata(),
      childPrimitiveType.getId());
  } else {
    GroupType childGroupType = childType.asGroupType();
    Type.ID id = childGroupType.getId();
    GroupType groupType = new GroupType(childType.getRepetition(),
      "element",
      childType.getOriginalType(),
      childGroupType.getFields());
    if (id != null) {
      groupType = groupType.withId(id.hashCode());
    }
    return groupType;
  }
}
 
Example 9
Source File: TestCTAS.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private void verifyFieldHasColumnId(Type field) {
  System.out.println("Verifying column " + field.getName());
  assertTrue("Field " + field.getName() + " does not have column id", field.getId() != null);
  if (field instanceof GroupType) {
    GroupType groupType = (GroupType)field;
    if (groupType.getOriginalType() == OriginalType.LIST) {
      groupType = groupType.getFields().get(0).asGroupType();
    }
    for(Type child : groupType.getFields()) {
      verifyFieldHasColumnId(child);
    }
  }
}
 
Example 10
Source File: TypeWithSchemaVisitor.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static <T> List<T> visitFields(Types.StructType struct, GroupType group, TypeWithSchemaVisitor<T> visitor) {
  List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount());
  for (Type field : group.getFields()) {
    int id = -1;
    if (field.getId() != null) {
      id = field.getId().intValue();
    }
    Types.NestedField iField = (struct != null && id >= 0) ? struct.field(id) : null;
    results.add(visitField(iField, field, visitor));
  }

  return results;
}
 
Example 11
Source File: ParquetTypeVisitor.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static <T> List<T> visitFields(GroupType group, ParquetTypeVisitor<T> visitor) {
  List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount());
  for (Type field : group.getFields()) {
    results.add(visitField(field, visitor));
  }

  return results;
}
 
Example 12
Source File: TypeWithSchemaVisitor.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static <T> List<T> visitFields(Types.StructType struct, GroupType group, TypeWithSchemaVisitor<T> visitor) {
  List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount());
  for (Type field : group.getFields()) {
    int id = -1;
    if (field.getId() != null) {
      id = field.getId().intValue();
    }
    Types.NestedField iField = (struct != null && id >= 0) ? struct.field(id) : null;
    results.add(visitField(iField, field, visitor));
  }

  return results;
}
 
Example 13
Source File: ParquetTypeVisitor.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static <T> List<T> visitFields(GroupType group, ParquetTypeVisitor<T> visitor) {
  List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount());
  for (Type field : group.getFields()) {
    visitor.beforeField(field);
    try {
      results.add(visit(field, visitor));
    } finally {
      visitor.afterField(field);
    }
  }

  return results;
}
 
Example 14
Source File: RowConverter.java    From flink with Apache License 2.0 5 votes vote down vote up
public RowConverter(GroupType schema, TypeInformation<?> typeInfo, ParentDataHolder parent, int pos) {
	this.typeInfo = typeInfo;
	this.parentDataHolder = parent;
	this.posInParentRow = pos;
	this.converters = new Converter[schema.getFieldCount()];

	int i = 0;
	if (typeInfo.getArity() >= 1 && (typeInfo instanceof CompositeType)) {
		for (Type field : schema.getFields()) {
			converters[i] = createConverter(field, i, ((CompositeType<?>) typeInfo).getTypeAt(i), this);
			i++;
		}
	}
}
 
Example 15
Source File: JsonRecordFormatter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Map<String, JsonRecordFormatter> buildWriters(GroupType groupSchema) {
  Map<String, JsonRecordFormatter> writers = new LinkedHashMap<String, JsonRecordFormatter>();
  for (Type type : groupSchema.getFields()) {
    if (type.isPrimitive()) {
      writers.put(type.getName(), new JsonPrimitiveWriter(type));
    } else {
      writers.put(type.getName(), new JsonGroupFormatter((GroupType) type));
    }
  }

  return writers;
}
 
Example 16
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, GroupType type, int depth, MessageType container, List<String> cpath) {
  String name = Strings.repeat(".", depth) + type.getName();
  Repetition rep = type.getRepetition();
  int fcount = type.getFieldCount();
  out.format("%s: %s F:%d%n", name, rep, fcount);

  cpath.add(type.getName());
  for (Type ftype : type.getFields()) {
    showDetails(out, ftype, depth + 1, container, cpath);
  }
  cpath.remove(cpath.size() - 1);
}
 
Example 17
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, GroupType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
  String name = Strings.repeat(".", depth) + type.getName();
  Repetition rep = type.getRepetition();
  int fcount = type.getFieldCount();
  out.format("%s: %s F:%d%n", name, rep, fcount);

  cpath.add(type.getName());
  for (Type ftype : type.getFields()) {
    showDetails(out, ftype, depth + 1, container, cpath, showOriginalTypes);
  }
  cpath.remove(cpath.size() - 1);
}
 
Example 18
Source File: ProtoMessageConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
ProtoMessageConverter(ParentValueContainer pvc, Message.Builder builder, GroupType parquetSchema) {

    int schemaSize = parquetSchema.getFieldCount();
    converters = new Converter[schemaSize];

    this.parent = pvc;
    int parquetFieldIndex = 1;

    if (pvc == null) {
      throw new IllegalStateException("Missing parent value container");
    }

    myBuilder = builder;

    Descriptors.Descriptor protoDescriptor = builder.getDescriptorForType();

    for (Type parquetField : parquetSchema.getFields()) {
      Descriptors.FieldDescriptor protoField = protoDescriptor.findFieldByName(parquetField.getName());

      if (protoField == null) {
        String description = "Scheme mismatch \n\"" + parquetField + "\"" +
                "\n proto descriptor:\n" + protoDescriptor.toProto();
        throw new IncompatibleSchemaModificationException("Cant find \"" + parquetField.getName() + "\" " + description);
      }

      converters[parquetFieldIndex - 1] = newMessageConverter(myBuilder, protoField, parquetField);

      parquetFieldIndex++;
    }
  }
 
Example 19
Source File: RowConverter.java    From flink with Apache License 2.0 5 votes vote down vote up
public RowConverter(GroupType schema, TypeInformation<?> typeInfo, ParentDataHolder parent, int pos) {
	this.typeInfo = typeInfo;
	this.parentDataHolder = parent;
	this.posInParentRow = pos;
	this.converters = new Converter[schema.getFieldCount()];

	int i = 0;
	if (typeInfo.getArity() >= 1 && (typeInfo instanceof CompositeType)) {
		for (Type field : schema.getFields()) {
			converters[i] = createConverter(field, i, ((CompositeType<?>) typeInfo).getTypeAt(i), this);
			i++;
		}
	}
}
 
Example 20
Source File: ParquetTypeVisitor.java    From presto with Apache License 2.0 5 votes vote down vote up
private static <T> List<T> visitFields(GroupType group, ParquetTypeVisitor<T> visitor)
{
    List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount());
    for (Type field : group.getFields()) {
        results.add(visitField(field, visitor));
    }

    return results;
}