Java Code Examples for org.apache.parquet.schema.Type#getName()

The following examples show how to use org.apache.parquet.schema.Type#getName() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SingleLevelArrayMapKeyValuesSchemaConverter.java    From presto with Apache License 2.0 6 votes vote down vote up
public static GroupType mapType(Repetition repetition, String alias, String mapAlias, Type keyType, Type valueType)
{
    //support projection only on key of a map
    if (valueType == null) {
        return listWrapper(
                repetition,
                alias,
                MAP_KEY_VALUE,
                new GroupType(
                        Repetition.REPEATED,
                        mapAlias,
                        keyType));
    }
    if (!valueType.getName().equals("value")) {
        throw new RuntimeException(valueType.getName() + " should be value");
    }
    return listWrapper(
            repetition,
            alias,
            MAP_KEY_VALUE,
            new GroupType(
                    Repetition.REPEATED,
                    mapAlias,
                    keyType,
                    valueType));
}
 
Example 2
Source File: SimpleGroup.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private StringBuilder appendToString(StringBuilder builder, String indent) {
  int i = 0;
  for (Type field : schema.getFields()) {
    String name = field.getName();
    List<Object> values = data[i];
    ++i;
    if (values != null && !values.isEmpty()) {
      for (Object value : values) {
        builder.append(indent).append(name);
        if (value == null) {
          builder.append(": NULL\n");
        } else if (value instanceof Group) {
          builder.append('\n');
          ((SimpleGroup) value).appendToString(builder, indent + "  ");
        } else {
          builder.append(": ").append(value.toString()).append('\n');
        }
      }
    }
  }
  return builder;
}
 
Example 3
Source File: GroupWriter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void writeGroup(Group group, GroupType type) {
  int fieldCount = type.getFieldCount();
  for (int field = 0; field < fieldCount; ++field) {
    int valueCount = group.getFieldRepetitionCount(field);
    if (valueCount > 0) {
      Type fieldType = type.getType(field);
      String fieldName = fieldType.getName();
      recordConsumer.startField(fieldName, field);
      for (int index = 0; index < valueCount; ++index) {
        if (fieldType.isPrimitive()) {
          group.writeValue(field, index, recordConsumer);
        } else {
          recordConsumer.startGroup();
          writeGroup(group.getGroup(field, index), fieldType.asGroupType());
          recordConsumer.endGroup();
        }
      }
      recordConsumer.endField(fieldName, field);
    }
  }
}
 
Example 4
Source File: PruneColumnsCommand.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private Type pruneColumnsInField(Type field, List<String> currentPath, Set<ColumnPath> prunePaths) {
  String fieldName = field.getName();
  currentPath.add(fieldName);
  ColumnPath path = ColumnPath.get(currentPath.toArray(new String[0]));
  Type prunedField = null;
  if (!prunePaths.contains(path)) {
    if (field.isPrimitive()) {
      prunedField = field;
    } else {
      List<Type> childFields = ((GroupType) field).getFields();
      List<Type> prunedFields = pruneColumnsInFields(childFields, currentPath, prunePaths);
      if (prunedFields.size() > 0) {
        prunedField = ((GroupType) field).withNewFields(prunedFields);
      }
    } 
  }

  currentPath.remove(fieldName);
  return prunedField;
}
 
Example 5
Source File: ParquetGroup.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public String toString(String indent) {
  StringBuilder result = new StringBuilder();
  int i = 0;
  for (Type field : this.schema.getFields()) {
    String name = field.getName();
    List<Object> values = this.data[i];
    for (Object value : values) {
      result.append(indent).append(name);
      if (value == null) {
        result.append(": NULL\n");
      } else if (value instanceof Group) {
        result.append("\n").append(((ParquetGroup) value).toString(indent + "  "));
      } else {
        result.append(": ").append(value.toString()).append("\n");
      }
    }
    i++;
  }
  return result.toString();
}
 
Example 6
Source File: SchemaIntersection.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public SchemaIntersection(MessageType fileSchema, Fields requestedFields) {
  if(requestedFields == Fields.UNKNOWN)
    requestedFields = Fields.ALL;

  Fields newFields = Fields.NONE;
  List<Type> newSchemaFields = new ArrayList<Type>();
  int schemaSize = fileSchema.getFieldCount();

  for (int i = 0; i < schemaSize; i++) {
    Type type = fileSchema.getType(i);
    Fields name = new Fields(type.getName());

    if(requestedFields.contains(name)) {
      newFields = newFields.append(name);
      newSchemaFields.add(type);
    }
  }

  this.sourceFields = newFields;
  this.requestedSchema = new MessageType(fileSchema.getName(), newSchemaFields);
}
 
Example 7
Source File: HiveSchemaUtil.java    From hudi with Apache License 2.0 6 votes vote down vote up
/**
 * Returns equivalent Hive table schema read from a parquet file.
 *
 * @param messageType : Parquet Schema
 * @return : Hive Table schema read from parquet file MAP[String,String]
 */
public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType) throws IOException {
  Map<String, String> schema = new LinkedHashMap<>();
  List<Type> parquetFields = messageType.getFields();
  for (Type parquetType : parquetFields) {
    StringBuilder result = new StringBuilder();
    String key = parquetType.getName();
    if (parquetType.isRepetition(Type.Repetition.REPEATED)) {
      result.append(createHiveArray(parquetType, ""));
    } else {
      result.append(convertField(parquetType));
    }

    schema.put(hiveCompatibleFieldName(key, false), result.toString());
  }
  return schema;
}
 
Example 8
Source File: TupleWriter.java    From hadoop-etl-udfs with MIT License 6 votes vote down vote up
private void writeTuple(Tuple tuple, GroupType type) {
    for (int index = 0; index < type.getFieldCount(); index++) {
        Type fieldType = type.getType(index);
        String fieldName = fieldType.getName();
        // empty fields have to be omitted
        if (tuple.isNull(index))
            continue;
        recordConsumer.startField(fieldName, index);
        if (fieldType.isPrimitive()) {
            tuple.writePrimitiveValue(recordConsumer, index, (PrimitiveType)fieldType);
        }
        else {
            recordConsumer.startGroup();
            writeTuple(tuple.getTuple(index), fieldType.asGroupType());
            recordConsumer.endGroup();
        }
        recordConsumer.endField(fieldName, index);
    }
}
 
Example 9
Source File: TestDataWritableWriter.java    From presto with Apache License 2.0 6 votes vote down vote up
/**
 * It writes all the fields contained inside a group to the RecordConsumer.
 *
 * @param value The list of values contained in the group.
 * @param inspector The object inspector used to get the correct value type.
 * @param type Type that contains information about the group schema.
 */
private void writeGroupFields(Object value, StructObjectInspector inspector, GroupType type)
{
    if (value != null) {
        List<? extends StructField> fields = inspector.getAllStructFieldRefs();
        List<Object> fieldValuesList = inspector.getStructFieldsDataAsList(value);

        for (int i = 0; i < type.getFieldCount(); i++) {
            Type fieldType = type.getType(i);
            String fieldName = fieldType.getName();
            Object fieldValue = fieldValuesList.get(i);

            if (fieldValue != null) {
                ObjectInspector fieldInspector = fields.get(i).getFieldObjectInspector();
                recordConsumer.startField(fieldName, i);
                writeValue(fieldValue, fieldInspector, fieldType);
                recordConsumer.endField(fieldName, i);
            }
        }
    }
}
 
Example 10
Source File: ParquetRowiseReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private static Type getType(String[] pathSegments, int depth, MessageType schema) {
  Type type = schema.getType(Arrays.copyOfRange(pathSegments, 0, depth + 1));
  if (depth + 1 == pathSegments.length) {
    return type;
  } else {
    Preconditions.checkState(!type.isPrimitive());
    return new GroupType(type.getRepetition(), type.getName(), type.getOriginalType(), getType(pathSegments, depth + 1, schema));
  }
}
 
Example 11
Source File: ProtoMessageConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
ProtoMessageConverter(ParentValueContainer pvc, Message.Builder builder, GroupType parquetSchema) {

    int schemaSize = parquetSchema.getFieldCount();
    converters = new Converter[schemaSize];

    this.parent = pvc;
    int parquetFieldIndex = 1;

    if (pvc == null) {
      throw new IllegalStateException("Missing parent value container");
    }

    myBuilder = builder;

    Descriptors.Descriptor protoDescriptor = builder.getDescriptorForType();

    for (Type parquetField : parquetSchema.getFields()) {
      Descriptors.FieldDescriptor protoField = protoDescriptor.findFieldByName(parquetField.getName());

      if (protoField == null) {
        String description = "Scheme mismatch \n\"" + parquetField + "\"" +
                "\n proto descriptor:\n" + protoDescriptor.toProto();
        throw new IncompatibleSchemaModificationException("Cant find \"" + parquetField.getName() + "\" " + description);
      }

      converters[parquetFieldIndex - 1] = newMessageConverter(myBuilder, protoField, parquetField);

      parquetFieldIndex++;
    }
  }
 
Example 12
Source File: ParquetGroup.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
public void add(int fieldIndex, Primitive value) {
  Type type = this.schema.getType(fieldIndex);
  List<Object> list = this.data[fieldIndex];
  if (!type.isRepetition(REPEATED) && !list.isEmpty()) {
    throw new IllegalStateException(
        "field " + fieldIndex + " (" + type.getName() + ") can not have more than one value: " + list);
  } else {
    list.add(value);
  }
}
 
Example 13
Source File: DataWritableWriter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void writeData(final ArrayWritable arr, final GroupType type) {
  if (arr == null) {
    return;
  }
  final int fieldCount = type.getFieldCount();
  Writable[] values = arr.get();
  for (int field = 0; field < fieldCount; ++field) {
    final Type fieldType = type.getType(field);
    final String fieldName = fieldType.getName();
    final Writable value = values[field];
    if (value == null) {
      continue;
    }
    recordConsumer.startField(fieldName, field);

    if (fieldType.isPrimitive()) {
      writePrimitive(value);
    } else {
      recordConsumer.startGroup();
      if (value instanceof ArrayWritable) {
        if (fieldType.asGroupType().getRepetition().equals(Type.Repetition.REPEATED)) {
          writeArray((ArrayWritable) value, fieldType.asGroupType());
        } else {
          writeData((ArrayWritable) value, fieldType.asGroupType());
        }
      } else if (value != null) {
        throw new ParquetEncodingException("This should be an ArrayWritable or MapWritable: " + value);
      }

      recordConsumer.endGroup();
    }

    recordConsumer.endField(fieldName, field);
  }
}
 
Example 14
Source File: SimpleRecordConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Converter createConverter(Type field) {
  LogicalTypeAnnotation ltype = field.getLogicalTypeAnnotation();

  if (field.isPrimitive()) {
    if (ltype != null) {
      return ltype.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() {
        @Override
        public Optional<Converter> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
          return of(new StringConverter(field.getName()));
        }

        @Override
        public Optional<Converter> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
          int scale = decimalLogicalType.getScale();
          return of(new DecimalConverter(field.getName(), scale));
        }
      }).orElse(new SimplePrimitiveConverter(field.getName()));
    }
    return new SimplePrimitiveConverter(field.getName());
  }

  GroupType groupType = field.asGroupType();
  if (ltype != null) {
    return ltype.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() {
      @Override
      public Optional<Converter> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
        return of(new SimpleMapRecordConverter(groupType, field.getName(), SimpleRecordConverter.this));
      }

      @Override
      public Optional<Converter> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
        return of(new SimpleListRecordConverter(groupType, field.getName(), SimpleRecordConverter.this));
      }
    }).orElse(new SimpleRecordConverter(groupType, field.getName(), this));
  }
  return new SimpleRecordConverter(groupType, field.getName(), this);
}
 
Example 15
Source File: DrillParquetReader.java    From Bats with Apache License 2.0 5 votes vote down vote up
private static Type getType(String[] pathSegments, int depth, MessageType schema) {
  Type type = schema.getType(Arrays.copyOfRange(pathSegments, 0, depth + 1));
  if (depth + 1 == pathSegments.length) {
    return type;
  } else {
    Preconditions.checkState(!type.isPrimitive());
    return new GroupType(type.getRepetition(), type.getName(), getType(pathSegments, depth + 1, schema));
  }
}
 
Example 16
Source File: SimpleGroup.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void add(int fieldIndex, Primitive value) {
  Type type = schema.getType(fieldIndex);
  List<Object> list = data[fieldIndex];
  if (!type.isRepetition(Type.Repetition.REPEATED)
      && !list.isEmpty()) {
    throw new IllegalStateException("field "+fieldIndex+" (" + type.getName() + ") can not have more than one value: " + list);
  }
  list.add(value);
}
 
Example 17
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private FieldSchema getFieldSchema(Type parquetType) throws FrontendException {
  final String fieldName = parquetType.getName();
  if (parquetType.isPrimitive()) {
    return getSimpleFieldSchema(fieldName, parquetType);
  } else {
    return getComplexFieldSchema(fieldName, parquetType);
  }
}
 
Example 18
Source File: ParquetGroupConverter.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
protected void addChildConverter(String fieldName, OutputMutator mutator,
    List<Field> arrowSchema, Iterator<SchemaPath> colIterator, Type type, Function<String, String> childNameResolver) {
  // Match the name of the field in the schema definition to the name of the field in the query.
  String name = null;
  SchemaPath col;
  PathSegment colPath;
  PathSegment colNextChild = null;

  if (colIterator.hasNext()) {
    col = colIterator.next();
    colPath = col.getRootSegment();
    colNextChild = colPath.getChild();

    while (true) {
      if (colPath.isNamed() && (!colPath.getNameSegment().getPath().equals("*"))) {
        name = colPath.getNameSegment().getPath();
        // We may have a field that does not exist in the schema
        if (name.equalsIgnoreCase(type.getName())) {
          break;
        }
      }
      name = null;
      colPath = colNextChild;
      if (colPath == null) {
        break;
      } else {
        colNextChild = colPath.getChild();
      }
    }
  }
  if (name == null) {
    name = type.getName();
  }

  final String nameForChild = childNameResolver.apply(name);
  final String fullChildName = fieldName.isEmpty() ? nameForChild : fieldName.concat(".").concat(nameForChild);
  final Converter converter = type.isPrimitive() ?
    getConverterForType(fullChildName, type.asPrimitiveType())
    : groupConverter(fullChildName, mutator, arrowSchema, type.asGroupType(), colNextChild);
  converters.add(converter);
}
 
Example 19
Source File: TestDataWritableWriter.java    From presto with Apache License 2.0 4 votes vote down vote up
/**
 * It writes a map type and its key-pair values to the Parquet RecordConsumer.
 * This is called when the original type (MAP) is detected by writeValue().
 * This function assumes the following schema:
 * optional group mapCol (MAP) {
 * repeated group map (MAP_KEY_VALUE) {
 * required TYPE key;
 * optional TYPE value;
 * }
 * }
 *
 * @param value The object that contains the map key-values.
 * @param inspector The object inspector used to get the correct value type.
 * @param type Type that contains information about the group (MAP) schema.
 */
private void writeMap(Object value, MapObjectInspector inspector, GroupType type)
{
    // Get the internal map structure (MAP_KEY_VALUE)
    GroupType repeatedType = type.getType(0).asGroupType();

    recordConsumer.startGroup();
    Map<?, ?> mapValues = inspector.getMap(value);
    if (mapValues != null && mapValues.size() > 0) {
        recordConsumer.startField(repeatedType.getName(), 0);

        Type keyType = repeatedType.getType(0);
        String keyName = keyType.getName();
        ObjectInspector keyInspector = inspector.getMapKeyObjectInspector();

        Type valuetype = repeatedType.getType(1);
        String valueName = valuetype.getName();
        ObjectInspector valueInspector = inspector.getMapValueObjectInspector();

        for (Map.Entry<?, ?> keyValue : mapValues.entrySet()) {
            recordConsumer.startGroup();
            if (keyValue != null) {
                // write key element
                Object keyElement = keyValue.getKey();
                recordConsumer.startField(keyName, 0);
                writeValue(keyElement, keyInspector, keyType);
                recordConsumer.endField(keyName, 0);

                // write value element
                Object valueElement = keyValue.getValue();
                if (valueElement != null) {
                    recordConsumer.startField(valueName, 1);
                    writeValue(valueElement, valueInspector, valuetype);
                    recordConsumer.endField(valueName, 1);
                }
            }
            recordConsumer.endGroup();
        }

        recordConsumer.endField(repeatedType.getName(), 0);
    }
    recordConsumer.endGroup();
}
 
Example 20
Source File: ColumnIO.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
ColumnIO(Type type, GroupColumnIO parent, int index) {
  this.type = type;
  this.parent = parent;
  this.index = index;
  this.name = type.getName();
}