Java Code Examples for org.apache.parquet.schema.Type#isPrimitive()

The following examples show how to use org.apache.parquet.schema.Type#isPrimitive() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Metadata.java    From Bats with Apache License 2.0 6 votes vote down vote up
private ColTypeInfo getColTypeInfo(MessageType schema, Type type, String[] path, int depth) {
  if (type.isPrimitive()) {
    PrimitiveType primitiveType = (PrimitiveType) type;
    int precision = 0;
    int scale = 0;
    if (primitiveType.getDecimalMetadata() != null) {
      precision = primitiveType.getDecimalMetadata().getPrecision();
      scale = primitiveType.getDecimalMetadata().getScale();
    }

    int repetitionLevel = schema.getMaxRepetitionLevel(path);
    int definitionLevel = schema.getMaxDefinitionLevel(path);

    return new ColTypeInfo(type.getOriginalType(), precision, scale, repetitionLevel, definitionLevel);
  }
  Type t = ((GroupType) type).getType(path[depth]);
  return getColTypeInfo(schema, t, path, depth + 1);
}
 
Example 2
Source File: HiveGroupConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
protected static Converter getConverterFromDescription(final Type type, final int index,
    final HiveGroupConverter parent) {
  if (type == null) {
    return null;
  }
  if (type.isPrimitive()) {
    return ETypeConverter.getNewConverter(type.asPrimitiveType().getPrimitiveTypeName().javaType,
        index, parent);
  } else {
    if (type.asGroupType().getRepetition() == Repetition.REPEATED) {
      return new ArrayWritableGroupConverter(type.asGroupType(), parent, index);
    } else {
      return new DataWritableGroupConverter(type.asGroupType(), parent, index);
    }
  }
}
 
Example 3
Source File: HiveSchemaUtil.java    From hudi with Apache License 2.0 6 votes vote down vote up
/**
 * Create an Array Hive schema from equivalent parquet list type.
 */
private static String createHiveArray(Type elementType, String elementName) {
  StringBuilder array = new StringBuilder();
  array.append("ARRAY< ");
  if (elementType.isPrimitive()) {
    array.append(convertField(elementType));
  } else {
    final GroupType groupType = elementType.asGroupType();
    final List<Type> groupFields = groupType.getFields();
    if (groupFields.size() > 1 || (groupFields.size() == 1
        && (elementType.getName().equals("array") || elementType.getName().equals(elementName + "_tuple")))) {
      array.append(convertField(elementType));
    } else {
      array.append(convertField(groupType.getFields().get(0)));
    }
  }
  array.append(">");
  return array.toString();
}
 
Example 4
Source File: TupleWriter.java    From hadoop-etl-udfs with MIT License 6 votes vote down vote up
private void writeTuple(Tuple tuple, GroupType type) {
    for (int index = 0; index < type.getFieldCount(); index++) {
        Type fieldType = type.getType(index);
        String fieldName = fieldType.getName();
        // empty fields have to be omitted
        if (tuple.isNull(index))
            continue;
        recordConsumer.startField(fieldName, index);
        if (fieldType.isPrimitive()) {
            tuple.writePrimitiveValue(recordConsumer, index, (PrimitiveType)fieldType);
        }
        else {
            recordConsumer.startGroup();
            writeTuple(tuple.getTuple(index), fieldType.asGroupType());
            recordConsumer.endGroup();
        }
        recordConsumer.endField(fieldName, index);
    }
}
 
Example 5
Source File: PruneColumnsCommand.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private Type pruneColumnsInField(Type field, List<String> currentPath, Set<ColumnPath> prunePaths) {
  String fieldName = field.getName();
  currentPath.add(fieldName);
  ColumnPath path = ColumnPath.get(currentPath.toArray(new String[0]));
  Type prunedField = null;
  if (!prunePaths.contains(path)) {
    if (field.isPrimitive()) {
      prunedField = field;
    } else {
      List<Type> childFields = ((GroupType) field).getFields();
      List<Type> prunedFields = pruneColumnsInFields(childFields, currentPath, prunePaths);
      if (prunedFields.size() > 0) {
        prunedField = ((GroupType) field).withNewFields(prunedFields);
      }
    } 
  }

  currentPath.remove(fieldName);
  return prunedField;
}
 
Example 6
Source File: TupleConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
final public void start() {
  currentTuple = TF.newTuple(schemaSize);
  if (elephantBirdCompatible) {
    try {
      int i = 0;
      for (Type field : parquetSchema.getFields()) {
        if (field.isPrimitive() && field.isRepetition(Repetition.OPTIONAL)) {
          PrimitiveType primitiveType = field.asPrimitiveType();
          switch (primitiveType.getPrimitiveTypeName()) {
          case INT32:
            currentTuple.set(i, I32_ZERO);
            break;
          case INT64:
            currentTuple.set(i, I64_ZERO);
            break;
          case FLOAT:
            currentTuple.set(i, FLOAT_ZERO);
            break;
          case DOUBLE:
            currentTuple.set(i, DOUBLE_ZERO);
            break;
          case BOOLEAN:
            currentTuple.set(i, I32_ZERO);
            break;
          }
        }
        ++ i;
      }
    } catch (ExecException e) {
      throw new RuntimeException(e);
    }
  }
}
 
Example 7
Source File: InternalParquetRecordReader.java    From tajo with Apache License 2.0 5 votes vote down vote up
private boolean contains(GroupType group, String[] path, int index) {
  if (index == path.length) {
    return false;
  }
  if (group.containsField(path[index])) {
    Type type = group.getType(path[index]);
    if (type.isPrimitive()) {
      return index + 1 == path.length;
    } else {
      return contains(type.asGroupType(), path, index + 1);
    }
  }
  return false;
}
 
Example 8
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Type filterBag(GroupType bagType, FieldSchema bagFieldSchema) throws FrontendException {
  if (LOG.isDebugEnabled()) LOG.debug("filtering BAG schema:\n" + bagType + "\nwith:\n " + bagFieldSchema);
  if (bagType.getFieldCount() != 1) {
    throw new RuntimeException("not unwrapping the right type, this should be a Bag: " + bagType);
  }
  Type nested = bagType.getType(0);
  FieldSchema innerField = bagFieldSchema.schema.getField(0);
  if (nested.isPrimitive() || nested.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation
    || nested.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) {
    // Bags always contain tuples => we skip the extra tuple that was inserted in that case.
    innerField = innerField.schema.getField(0);
  }
  return bagType.withNewFields(filter(nested, innerField));
}
 
Example 9
Source File: RowConverter.java    From flink with Apache License 2.0 5 votes vote down vote up
RowPrimitiveConverter(Type dataType, ParentDataHolder parentDataHolder, int pos) {
	this.parentDataHolder = parentDataHolder;
	this.pos = pos;
	if (dataType.isPrimitive()) {
		this.originalType = dataType.getOriginalType();
		this.primitiveTypeName = dataType.asPrimitiveType().getPrimitiveTypeName();
	} else {
		// Backward-compatibility  It can be a group type middle layer
		Type primitiveType = dataType.asGroupType().getType(0);
		this.originalType = primitiveType.getOriginalType();
		this.primitiveTypeName = primitiveType.asPrimitiveType().getPrimitiveTypeName();
	}
}
 
Example 10
Source File: TupleConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Converter newConverter(Type type, int i) {
  if(!type.isPrimitive()) {
    throw new IllegalArgumentException("cascading can only build tuples from primitive types");
  } else {
    return new TuplePrimitiveConverter(this, i);
  }
}
 
Example 11
Source File: ValidatingRecordConsumer.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void validate(PrimitiveTypeName... ptypes) {
  Type currentType = types.peek().asGroupType().getType(fields.peek());
  int c = fieldValueCount.pop() + 1;
  fieldValueCount.push(c);
  if (LOG.isDebugEnabled()) LOG.debug("validate " + Arrays.toString(ptypes) + " for " + currentType.getName());
  switch (currentType.getRepetition()) {
    case OPTIONAL:
    case REQUIRED:
      if (c > 1) {
        throw new InvalidRecordException("repeated value when the type is not repeated in " + currentType);
      }
      break;
    case REPEATED:
      break;
    default:
      throw new InvalidRecordException("unknown repetition " + currentType.getRepetition() + " in " + currentType);
  }
  if (!currentType.isPrimitive()) {
    throw new InvalidRecordException(
        "expected type in " + Arrays.toString(ptypes) + " but got " + currentType);
  }
  for (PrimitiveTypeName p : ptypes) {
    if (currentType.asPrimitiveType().getPrimitiveTypeName() == p) {
      return; // type is valid
    }
  }
  throw new InvalidRecordException(
      "expected type in " + Arrays.toString(ptypes) + " but got " + currentType);
}
 
Example 12
Source File: Util.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static PrimitiveType primitive(MessageType schema, String[] path) {
  Type current = schema;
  for (String part : path) {
    current = current.asGroupType().getType(part);
    if (current.isPrimitive()) {
      return current.asPrimitiveType();
    }
  }
  return null;
}
 
Example 13
Source File: JsonRecordFormatter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Map<String, JsonRecordFormatter> buildWriters(GroupType groupSchema) {
  Map<String, JsonRecordFormatter> writers = new LinkedHashMap<String, JsonRecordFormatter>();
  for (Type type : groupSchema.getFields()) {
    if (type.isPrimitive()) {
      writers.put(type.getName(), new JsonPrimitiveWriter(type));
    } else {
      writers.put(type.getName(), new JsonGroupFormatter((GroupType) type));
    }
  }

  return writers;
}
 
Example 14
Source File: ParquetValueReaders.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private <E> Setter<I> newSetter(ParquetValueReader<E> reader, Type type) {
  if (reader instanceof UnboxedReader && type.isPrimitive()) {
    UnboxedReader<?> unboxed  = (UnboxedReader<?>) reader;
    switch (type.asPrimitiveType().getPrimitiveTypeName()) {
      case BOOLEAN:
        return (record, pos, ignored) -> setBoolean(record, pos, unboxed.readBoolean());
      case INT32:
        return (record, pos, ignored) -> setInteger(record, pos, unboxed.readInteger());
      case INT64:
        return (record, pos, ignored) -> setLong(record, pos, unboxed.readLong());
      case FLOAT:
        return (record, pos, ignored) -> setFloat(record, pos, unboxed.readFloat());
      case DOUBLE:
        return (record, pos, ignored) -> setDouble(record, pos, unboxed.readDouble());
      case FIXED_LEN_BYTE_ARRAY:
      case BINARY:
        return (record, pos, ignored) -> set(record, pos, unboxed.readBinary());
      default:
        throw new UnsupportedOperationException("Unsupported type: " + type);
    }
  }

  // TODO: Add support for options to avoid the null check
  return (record, pos, reuse) -> {
    Object obj = reader.read((E) reuse);
    if (obj != null) {
      set(record, pos, obj);
    } else {
      setNull(record, pos);
    }
  };
}
 
Example 15
Source File: ThriftRecordConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private StructConverter(List<TProtocol> events, GroupType parquetSchema, ThriftField field) {
  this.events = events;
  this.name = field.getName();
  this.tStruct = new TStruct(name);
  this.thriftType = (StructType)field.getType();
  this.schemaSize = parquetSchema.getFieldCount();
  this.converters = new Converter[this.schemaSize];
  List<ThriftField> thriftChildren = thriftType.getChildren();
  for (int i = 0; i < schemaSize; i++) {
    Type schemaType = parquetSchema.getType(i);
    String fieldName = schemaType.getName();
    ThriftField matchingThrift = null;
    for (ThriftField childField: thriftChildren) {
      String thriftChildName = childField.getName();
      if (thriftChildName != null && thriftChildName.equalsIgnoreCase(fieldName)) {
        matchingThrift = childField;
        break;
      }
    }
    if (matchingThrift == null) {
    	// this means the file did not contain that field
      // it will never be populated in this instance
      // other files might populate it
    	continue;
    }
    if (schemaType.isPrimitive()) {
    	converters[i] = new PrimitiveFieldHandler(newConverter(events, schemaType, matchingThrift).asPrimitiveConverter(), matchingThrift, events);
    } else {
    	converters[i] = new GroupFieldhandler(newConverter(events, schemaType, matchingThrift).asGroupConverter(), matchingThrift, events);
    }
  }
}
 
Example 16
Source File: DataWritableWriter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void writeArray(final ArrayWritable array, final GroupType type) {
  if (array == null) {
    return;
  }
  final Writable[] subValues = array.get();
  final int fieldCount = type.getFieldCount();
  for (int field = 0; field < fieldCount; ++field) {
    final Type subType = type.getType(field);
    recordConsumer.startField(subType.getName(), field);
    for (int i = 0; i < subValues.length; ++i) {
      final Writable subValue = subValues[i];
      if (subValue != null) {
        if (subType.isPrimitive()) {
          if (subValue instanceof ArrayWritable) {
            writePrimitive(((ArrayWritable) subValue).get()[field]);// 0 ?
          } else {
            writePrimitive(subValue);
          }
        } else {
          if (!(subValue instanceof ArrayWritable)) {
            throw new RuntimeException("This should be a ArrayWritable: " + subValue);
          } else {
            recordConsumer.startGroup();
            writeData((ArrayWritable) subValue, subType.asGroupType());
            recordConsumer.endGroup();
          }
        }
      }
    }
    recordConsumer.endField(subType.getName(), field);
  }
}
 
Example 17
Source File: TajoSchemaConverter.java    From tajo with Apache License 2.0 5 votes vote down vote up
private Column convertField(final Type fieldType) {
  if (fieldType.isPrimitive()) {
    return convertPrimitiveField(fieldType);
  } else {
    return convertComplexField(fieldType);
  }
}
 
Example 18
Source File: DataWritableWriter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void writeData(final ArrayWritable arr, final GroupType type) {
  if (arr == null) {
    return;
  }
  final int fieldCount = type.getFieldCount();
  Writable[] values = arr.get();
  for (int field = 0; field < fieldCount; ++field) {
    final Type fieldType = type.getType(field);
    final String fieldName = fieldType.getName();
    final Writable value = values[field];
    if (value == null) {
      continue;
    }
    recordConsumer.startField(fieldName, field);

    if (fieldType.isPrimitive()) {
      writePrimitive(value);
    } else {
      recordConsumer.startGroup();
      if (value instanceof ArrayWritable) {
        if (fieldType.asGroupType().getRepetition().equals(Type.Repetition.REPEATED)) {
          writeArray((ArrayWritable) value, fieldType.asGroupType());
        } else {
          writeData((ArrayWritable) value, fieldType.asGroupType());
        }
      } else if (value != null) {
        throw new ParquetEncodingException("This should be an ArrayWritable or MapWritable: " + value);
      }

      recordConsumer.endGroup();
    }

    recordConsumer.endField(fieldName, field);
  }
}
 
Example 19
Source File: ParquetTypeVisitor.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public static <T> T visit(Type type, ParquetTypeVisitor<T> visitor) {
  if (type instanceof MessageType) {
    return visitor.message((MessageType) type,
        visitFields(type.asGroupType(), visitor));

  } else if (type.isPrimitive()) {
    return visitor.primitive(type.asPrimitiveType());

  } else {
    // if not a primitive, the typeId must be a group
    GroupType group = type.asGroupType();
    OriginalType annotation = group.getOriginalType();
    if (annotation != null) {
      switch (annotation) {
        case LIST:
          Preconditions.checkArgument(!group.isRepetition(REPEATED),
              "Invalid list: top-level group is repeated: " + group);
          Preconditions.checkArgument(group.getFieldCount() == 1,
              "Invalid list: does not contain single repeated field: " + group);

          GroupType repeatedElement = group.getFields().get(0).asGroupType();
          Preconditions.checkArgument(repeatedElement.isRepetition(REPEATED),
              "Invalid list: inner group is not repeated");
          Preconditions.checkArgument(repeatedElement.getFieldCount() <= 1,
              "Invalid list: repeated group is not a single field: " + group);

          visitor.fieldNames.push(repeatedElement.getName());
          try {
            T elementResult = null;
            if (repeatedElement.getFieldCount() > 0) {
              elementResult = visitField(repeatedElement.getType(0), visitor);
            }

            return visitor.list(group, elementResult);

          } finally {
            visitor.fieldNames.pop();
          }

        case MAP:
          Preconditions.checkArgument(!group.isRepetition(REPEATED),
              "Invalid map: top-level group is repeated: " + group);
          Preconditions.checkArgument(group.getFieldCount() == 1,
              "Invalid map: does not contain single repeated field: " + group);

          GroupType repeatedKeyValue = group.getType(0).asGroupType();
          Preconditions.checkArgument(repeatedKeyValue.isRepetition(REPEATED),
              "Invalid map: inner group is not repeated");
          Preconditions.checkArgument(repeatedKeyValue.getFieldCount() <= 2,
              "Invalid map: repeated group does not have 2 fields");

          visitor.fieldNames.push(repeatedKeyValue.getName());
          try {
            T keyResult = null;
            T valueResult = null;
            switch (repeatedKeyValue.getFieldCount()) {
              case 2:
                // if there are 2 fields, both key and value are projected
                keyResult = visitField(repeatedKeyValue.getType(0), visitor);
                valueResult = visitField(repeatedKeyValue.getType(1), visitor);
              case 1:
                // if there is just one, use the name to determine what it is
                Type keyOrValue = repeatedKeyValue.getType(0);
                if (keyOrValue.getName().equalsIgnoreCase("key")) {
                  keyResult = visitField(keyOrValue, visitor);
                  // value result remains null
                } else {
                  valueResult = visitField(keyOrValue, visitor);
                  // key result remains null
                }
              default:
                // both results will remain null
            }

            return visitor.map(group, keyResult, valueResult);

          } finally {
            visitor.fieldNames.pop();
          }

        default:
      }
    }

    return visitor.struct(group, visitFields(group, visitor));
  }
}
 
Example 20
Source File: LogicalListL1Converter.java    From dremio-oss with Apache License 2.0 3 votes vote down vote up
/**
 * Checks if the schema is similar to the following:
 * <pre>
 * optional group <name> (LIST) {
 *   repeated group <list-name> {
 *     <element-repetition> <element-type> <element-name>;
 *   }
 * }
 * </pre>
 *
 * @param schema parquet group type
 * @return true is supported
 */
public static boolean isSupportedSchema(GroupType schema) {
  if (schema.getFieldCount() == 1) {
    Type type = schema.getType(0);
    // check: repeated group
    if (type.isPrimitive() || !type.isRepetition(REPEATED) || type.getOriginalType() != null) {
      return false;
    }
    return type.asGroupType().getFieldCount() == 1;
  }
  return false;
}