parquet.schema.GroupType Java Examples

The following examples show how to use parquet.schema.GroupType. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TupleWriter.java    From hadoop-etl-udfs with MIT License 6 votes vote down vote up
private void writeTuple(Tuple tuple, GroupType type) {
    for (int index = 0; index < type.getFieldCount(); index++) {
        Type fieldType = type.getType(index);
        String fieldName = fieldType.getName();
        // empty fields have to be omitted
        if (tuple.isNull(index))
            continue;
        recordConsumer.startField(fieldName, index);
        if (fieldType.isPrimitive()) {
            tuple.writePrimitiveValue(recordConsumer, index, (PrimitiveType)fieldType);
        }
        else {
            recordConsumer.startGroup();
            writeTuple(tuple.getTuple(index), fieldType.asGroupType());
            recordConsumer.endGroup();
        }
        recordConsumer.endField(fieldName, index);
    }
}
 
Example #2
Source File: JsonElementConversionFactory.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Override
Object convertField(JsonElement value) {
  ParquetGroup r1 = new ParquetGroup((GroupType) schema());
  JsonObject inputRecord = value.getAsJsonObject();
  for (Map.Entry<String, JsonElement> entry : inputRecord.entrySet()) {
    String key = entry.getKey();
    JsonElementConverter converter = this.converters.get(key);
    Object convertedValue = converter.convert(entry.getValue());
    boolean valueIsNull = convertedValue == null;
    Type.Repetition repetition = optionalOrRequired(converter.jsonSchema);
    if (valueIsNull && repetition.equals(OPTIONAL)) {
      continue;
    }
    r1.add(key, convertedValue);
  }
  return r1;
}
 
Example #3
Source File: JsonElementConversionFactory.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private Type buildSchema() {
  JsonArray inputSchema = this.jsonSchema.getDataTypeValues();
  List<Type> parquetTypes = new ArrayList<>();
  for (JsonElement element : inputSchema) {
    JsonObject map = (JsonObject) element;
    JsonSchema elementSchema = new JsonSchema(map);
    String columnName = elementSchema.getColumnName();
    JsonElementConverter converter = JsonElementConversionFactory.getConverter(elementSchema, false);
    Type schemaType = converter.schema();
    this.converters.put(columnName, converter);
    parquetTypes.add(schemaType);
  }
  String docName = this.jsonSchema.getColumnName();
  switch (recordType) {
    case ROOT:
      return new MessageType(docName, parquetTypes);
    case CHILD:
      return new GroupType(optionalOrRequired(this.jsonSchema), docName, parquetTypes);
    default:
      throw new RuntimeException("Unsupported Record type");
  }
}
 
Example #4
Source File: JsonElementConversionFactory.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Override
protected Type buildSchema() {
  JsonElementConverter elementConverter = this.elementConverter;
  JsonElementConverter keyConverter = getKeyConverter();
  GroupType mapGroup =
      Types.repeatedGroup().addFields(keyConverter.schema(), elementConverter.schema()).named(MAP_KEY)
          .asGroupType();
  String columnName = this.jsonSchema.getColumnName();
  switch (optionalOrRequired(this.jsonSchema)) {
    case OPTIONAL:
      return Types.optionalGroup().addFields(mapGroup).named(columnName).asGroupType();
    case REQUIRED:
      return Types.requiredGroup().addFields(mapGroup).named(columnName).asGroupType();
    default:
      return null;
  }
}
 
Example #5
Source File: ParquetGroup.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
public ParquetGroup(GroupType schema) {
  this.schema = schema;
  this.data = new List[schema.getFields().size()];

  for (int i = 0; i < schema.getFieldCount(); ++i) {
    this.data[i] = new ArrayList();
  }
}
 
Example #6
Source File: JsonElementConversionFactory.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Override
Object convertField(JsonElement value) {
  ParquetGroup array = new ParquetGroup((GroupType) schema());
  JsonElementConverter converter = this.elementConverter;
  for (JsonElement elem : (JsonArray) value) {
    array.add(ARRAY_KEY, converter.convert(elem));
  }
  return array;
}
 
Example #7
Source File: MetadataUtils.java    From parquet-tools with Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, GroupType type, int depth, MessageType container, List<String> cpath) {
  String name = Strings.repeat(".", depth) + type.getName();
  Repetition rep = type.getRepetition();
  int fcount = type.getFieldCount();
  out.format("%s: %s F:%d%n", name, rep, fcount);

  cpath.add(type.getName());
  for (Type ftype : type.getFields()) {
    showDetails(out, ftype, depth + 1, container, cpath);
  }
  cpath.remove(cpath.size() - 1);
}
 
Example #8
Source File: MetadataUtils.java    From parquet-tools with Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, Type type, int depth, MessageType container, List<String> cpath) {
  if (type instanceof GroupType) {
    showDetails(out, type.asGroupType(), depth, container, cpath);
    return;
  } else if (type instanceof PrimitiveType) {
    showDetails(out, type.asPrimitiveType(), depth, container, cpath);
    return;
  }
}
 
Example #9
Source File: SimpleRecordConverter.java    From parquet-tools with Apache License 2.0 5 votes vote down vote up
public SimpleRecordConverter(GroupType schema, String name, SimpleRecordConverter parent) {
  this.converters = new Converter[schema.getFieldCount()];
  this.parent = parent;
  this.name = name;

  int i = 0;
  for (Type field: schema.getFields()) {
    converters[i++] = createConverter(field);
  }
}
 
Example #10
Source File: TupleWriter.java    From hadoop-etl-udfs with MIT License 4 votes vote down vote up
public TupleWriter(RecordConsumer recordConsumer, GroupType schema) {
    this.recordConsumer = recordConsumer;
    this.schema = schema;
}
 
Example #11
Source File: ParquetGroup.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
public GroupType getType() {
  return this.schema;
}
 
Example #12
Source File: JsonElementConversionFactory.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Override
protected Type buildSchema() {
  List<Type> fields = new ArrayList<>();
  fields.add(0, this.elementConverter.schema());
  return new GroupType(optionalOrRequired(jsonSchema), this.jsonSchema.getColumnName(), fields);
}
 
Example #13
Source File: MetadataUtils.java    From parquet-tools with Apache License 2.0 4 votes vote down vote up
public static void showDetails(PrettyPrintWriter out, GroupType type) {
  showDetails(out, type, 0, null, null);
}
 
Example #14
Source File: SimpleRecordConverter.java    From parquet-tools with Apache License 2.0 4 votes vote down vote up
public SimpleRecordConverter(GroupType schema) {
  this(schema, null, null);
}