Java Code Examples for org.apache.parquet.schema.GroupType#containsField()

The following examples show how to use org.apache.parquet.schema.GroupType#containsField() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ColumnIOFactory.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void visitChildren(GroupColumnIO newIO, GroupType groupType, GroupType requestedGroupType) {
  GroupColumnIO oldIO = current;
  current = newIO;
  for (Type type : groupType.getFields()) {
    // if the file schema does not contain the field it will just stay null
    if (requestedGroupType.containsField(type.getName())) {
      currentRequestedIndex = requestedGroupType.getFieldIndex(type.getName());
      currentRequestedType = requestedGroupType.getType(currentRequestedIndex);
      if (currentRequestedType.getRepetition().isMoreRestrictiveThan(type.getRepetition())) {
        incompatibleSchema(type, currentRequestedType);
      }
      type.accept(this);
    }
  }
  current = oldIO;
}
 
Example 2
Source File: ThriftRecordConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private boolean hasMissingRequiredFieldInGroupType(GroupType requested, GroupType fullSchema) {
  for (Type field : fullSchema.getFields()) {

    if (requested.containsField(field.getName())) {
      Type requestedType = requested.getType(field.getName());
      // if a field is in requested schema and the type of it is a group type, then do recursive check
      if (!field.isPrimitive()) {
        if (hasMissingRequiredFieldInGroupType(requestedType.asGroupType(), field.asGroupType())) {
          return true;
        } else {
          continue;// check next field
        }
      }
    } else {
      if (field.getRepetition() == Type.Repetition.REQUIRED) {
        return true; // if a field is missing in requested schema and it's required
      } else {
        continue; // the missing field is not required, then continue checking next field
      }
    }
  }

  return false;
}
 
Example 3
Source File: ProtoMessageConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public ListConverter(Message.Builder parentBuilder, Descriptors.FieldDescriptor fieldDescriptor, Type parquetType) {
  LogicalTypeAnnotation logicalTypeAnnotation = parquetType.getLogicalTypeAnnotation();
  if (!(logicalTypeAnnotation instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) || parquetType.isPrimitive()) {
    throw new ParquetDecodingException("Expected LIST wrapper. Found: " + logicalTypeAnnotation + " instead.");
  }

  GroupType rootWrapperType = parquetType.asGroupType();
  if (!rootWrapperType.containsField("list") || rootWrapperType.getType("list").isPrimitive()) {
    throw new ParquetDecodingException("Expected repeated 'list' group inside LIST wrapperr but got: " + rootWrapperType);
  }

  GroupType listType = rootWrapperType.getType("list").asGroupType();
  if (!listType.containsField("element")) {
    throw new ParquetDecodingException("Expected 'element' inside repeated list group but got: " + listType);
  }

  Type elementType = listType.getType("element");
  converter = newMessageConverter(parentBuilder, fieldDescriptor, elementType);
}
 
Example 4
Source File: ParquetTypeUtils.java    From presto with Apache License 2.0 5 votes vote down vote up
public static org.apache.parquet.schema.Type getParquetTypeByName(String columnName, GroupType groupType)
{
    if (groupType.containsField(columnName)) {
        return groupType.getType(columnName);
    }
    // parquet is case-sensitive, but hive is not. all hive columns get converted to lowercase
    // check for direct match above but if no match found, try case-insensitive match
    for (org.apache.parquet.schema.Type type : groupType.getFields()) {
        if (type.getName().equalsIgnoreCase(columnName)) {
            return type;
        }
    }

    return null;
}
 
Example 5
Source File: InternalParquetRecordReader.java    From tajo with Apache License 2.0 5 votes vote down vote up
private boolean contains(GroupType group, String[] path, int index) {
  if (index == path.length) {
    return false;
  }
  if (group.containsField(path[index])) {
    Type type = group.getType(path[index]);
    if (type.isPrimitive()) {
      return index + 1 == path.length;
    } else {
      return contains(type.asGroupType(), path, index + 1);
    }
  }
  return false;
}
 
Example 6
Source File: TupleConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public TupleConverter(GroupType parquetSchema, Schema pigSchema, boolean elephantBirdCompatible, boolean columnIndexAccess) {
  this.parquetSchema = parquetSchema;
  this.elephantBirdCompatible = elephantBirdCompatible;
  try {
    this.schemaSize = max(parquetSchema.getFieldCount(), pigSchema.getFields().size());
    this.converters = new Converter[this.schemaSize];
    for (int i = 0, c = 0; i < schemaSize; i++) {
      FieldSchema field = pigSchema.getField(i);
      if(parquetSchema.containsField(field.alias) || columnIndexAccess) {
        Type type = getType(columnIndexAccess, field.alias, i);

        if(type != null) {
          final int index = i;
          converters[c++] = newConverter(field, type, new ParentValueContainer() {
            @Override
            void add(Object value) {
              TupleConverter.this.set(index, value);
            }
          }, elephantBirdCompatible, columnIndexAccess);
        }
      }

    }
  } catch (FrontendException e) {
    throw new ParquetDecodingException("can not initialize pig converter from:\n" + parquetSchema + "\n" + pigSchema, e);
  }
}
 
Example 7
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) {
  List<FieldSchema> fields = requestedPigSchema.getFields();
  List<Type> newFields = new ArrayList<Type>();
  for (int i = 0; i < fields.size(); i++) {
    FieldSchema fieldSchema = fields.get(i);
    String name = name(fieldSchema.alias, "field_"+i);
    if (schemaToFilter.containsField(name)) {
      newFields.add(filter(schemaToFilter.getType(name), fieldSchema));
    }
  }
  return newFields;
}