Java Code Examples for org.apache.parquet.schema.Type#asPrimitiveType()

The following examples show how to use org.apache.parquet.schema.Type#asPrimitiveType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetRecordWriter.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
/**
 * Changes the list inner '$data$' vector name to 'element' in the schema
 */
private Type renameChildTypeToElement(Type childType) {
  if (childType.isPrimitive()) {
    PrimitiveType childPrimitiveType = childType.asPrimitiveType();
    return new PrimitiveType(childType.getRepetition(),
      childPrimitiveType.getPrimitiveTypeName(),
      childPrimitiveType.getTypeLength(),
      "element",
      childPrimitiveType.getOriginalType(),
      childPrimitiveType.getDecimalMetadata(),
      childPrimitiveType.getId());
  } else {
    GroupType childGroupType = childType.asGroupType();
    Type.ID id = childGroupType.getId();
    GroupType groupType = new GroupType(childType.getRepetition(),
      "element",
      childType.getOriginalType(),
      childGroupType.getFields());
    if (id != null) {
      groupType = groupType.withId(id.hashCode());
    }
    return groupType;
  }
}
 
Example 2
Source File: Statistics.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 * Creates an empty {@code Statistics} instance for the specified type to be
 * used for reading/writing the new min/max statistics used in the V2 format.
 *
 * @param type
 *          type of the column
 * @return instance of a typed statistics class
 */
public static Statistics<?> createStats(Type type) {
  PrimitiveType primitive = type.asPrimitiveType();
  switch (primitive.getPrimitiveTypeName()) {
    case INT32:
      return new IntStatistics(primitive);
    case INT64:
      return new LongStatistics(primitive);
    case FLOAT:
      return new FloatStatistics(primitive);
    case DOUBLE:
      return new DoubleStatistics(primitive);
    case BOOLEAN:
      return new BooleanStatistics(primitive);
    case BINARY:
    case INT96:
    case FIXED_LEN_BYTE_ARRAY:
      return new BinaryStatistics(primitive);
    default:
      throw new UnknownColumnTypeException(primitive.getPrimitiveTypeName());
  }
}
 
Example 3
Source File: Util.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static PrimitiveType primitive(MessageType schema, String[] path) {
  Type current = schema;
  for (String part : path) {
    current = current.asGroupType().getType(part);
    if (current.isPrimitive()) {
      return current.asPrimitiveType();
    }
  }
  return null;
}
 
Example 4
Source File: TupleConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
final public void start() {
  currentTuple = TF.newTuple(schemaSize);
  if (elephantBirdCompatible) {
    try {
      int i = 0;
      for (Type field : parquetSchema.getFields()) {
        if (field.isPrimitive() && field.isRepetition(Repetition.OPTIONAL)) {
          PrimitiveType primitiveType = field.asPrimitiveType();
          switch (primitiveType.getPrimitiveTypeName()) {
          case INT32:
            currentTuple.set(i, I32_ZERO);
            break;
          case INT64:
            currentTuple.set(i, I64_ZERO);
            break;
          case FLOAT:
            currentTuple.set(i, FLOAT_ZERO);
            break;
          case DOUBLE:
            currentTuple.set(i, DOUBLE_ZERO);
            break;
          case BOOLEAN:
            currentTuple.set(i, I32_ZERO);
            break;
          }
        }
        ++ i;
      }
    } catch (ExecException e) {
      throw new RuntimeException(e);
    }
  }
}
 
Example 5
Source File: AvroRecordConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private static Converter newConverter(Schema schema, Type type,
    GenericData model, Class<?> knownClass, ParentValueContainer setter) {
  LogicalType logicalType = schema.getLogicalType();
  Conversion<?> conversion;

  if (knownClass != null) {
    conversion = model.getConversionByClass(knownClass, logicalType);
  } else {
    conversion = model.getConversionFor(logicalType);
  }

  ParentValueContainer parent = ParentValueContainer
      .getConversionContainer(setter, conversion, schema);

  switch (schema.getType()) {
  case BOOLEAN:
    return new AvroConverters.FieldBooleanConverter(parent);
  case INT:
    Class<?> intDatumClass = getDatumClass(conversion, knownClass, schema, model);
    if (intDatumClass == null) {
      return new AvroConverters.FieldIntegerConverter(parent);
    }
    if (intDatumClass == byte.class || intDatumClass == Byte.class) {
      return new AvroConverters.FieldByteConverter(parent);
    }
    if (intDatumClass == char.class || intDatumClass == Character.class) {
      return new AvroConverters.FieldCharConverter(parent);
    }
    if (intDatumClass == short.class || intDatumClass == Short.class) {
      return new AvroConverters.FieldShortConverter(parent);
    }
    return new AvroConverters.FieldIntegerConverter(parent);
  case LONG:
    return new AvroConverters.FieldLongConverter(parent);
  case FLOAT:
    return new AvroConverters.FieldFloatConverter(parent);
  case DOUBLE:
    return new AvroConverters.FieldDoubleConverter(parent);
  case BYTES:
    Class<?> byteDatumClass = getDatumClass(conversion, knownClass, schema, model);
    if (byteDatumClass == null) {
      return new AvroConverters.FieldByteBufferConverter(parent);
    }
    if (byteDatumClass.isArray() && byteDatumClass.getComponentType() == byte.class) {
      return new AvroConverters.FieldByteArrayConverter(parent);
    }
    return new AvroConverters.FieldByteBufferConverter(parent);
  case STRING:
    if (logicalType != null && logicalType.getName().equals(LogicalTypes.uuid().getName())) {
      return new AvroConverters.FieldUUIDConverter(parent, type.asPrimitiveType());
    }
    return newStringConverter(schema, model, parent);
  case RECORD:
    return new AvroRecordConverter(parent, type.asGroupType(), schema, model);
  case ENUM:
    return new AvroConverters.FieldEnumConverter(parent, schema, model);
  case ARRAY:
    Class<?> arrayDatumClass = getDatumClass(conversion, knownClass, schema, model);
    if (arrayDatumClass != null && arrayDatumClass.isArray()) {
      return new AvroArrayConverter(parent, type.asGroupType(), schema, model,
          arrayDatumClass);
    }
    return new AvroCollectionConverter(parent, type.asGroupType(), schema,
        model, arrayDatumClass);
  case MAP:
    return new MapConverter(parent, type.asGroupType(), schema, model);
  case UNION:
    return new AvroUnionConverter(parent, type, schema, model);
  case FIXED:
    return new AvroConverters.FieldFixedConverter(parent, schema, model);
  default:
    throw new UnsupportedOperationException(String.format(
        "Cannot convert Avro type: %s to Parquet type: %s", schema, type));
  }
}