org.apache.parquet.schema.ConversionPatterns Java Examples

The following examples show how to use org.apache.parquet.schema.ConversionPatterns. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param alias
 * @param fieldSchema
 * @return an optional group containing one repeated group field (key, value)
 * @throws FrontendException
 */
private GroupType convertMap(String alias, FieldSchema fieldSchema) {
  Schema innerSchema = fieldSchema.schema;
  if (innerSchema == null || innerSchema.size() != 1) {
    throw new SchemaConversionException("Invalid map Schema, schema should contain exactly one field: " + fieldSchema);
  }
  FieldSchema innerField = null;
  try {
    innerField = innerSchema.getField(0);
  } catch (FrontendException fe) {
    throw new SchemaConversionException("Invalid map schema, cannot infer innerschema: ", fe);
  }
  Type convertedValue = convertWithName(innerField, "value");
  return ConversionPatterns.stringKeyMapType(Repetition.OPTIONAL, alias, name(innerField.alias, "map"),
      convertedValue);
}
 
Example #2
Source File: SingleLevelArraySchemaConverter.java    From presto with Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(String name, MapTypeInfo typeInfo, Repetition repetition)
{
    Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
            typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
            typeInfo.getMapValueTypeInfo());
    return ConversionPatterns.mapType(repetition, name, keyType, valueType);
}
 
Example #3
Source File: HiveSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(final String name, final MapTypeInfo typeInfo) {
  final Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
      typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
  final Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
      typeInfo.getMapValueTypeInfo());
  return ConversionPatterns.mapType(Repetition.OPTIONAL, name, keyType, valueType);
}
 
Example #4
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param name
 * @param fieldSchema
 * @return an optional group containing one repeated group field
 * @throws FrontendException
 */
private GroupType convertBag(String name, FieldSchema fieldSchema) throws FrontendException {
  FieldSchema innerField = fieldSchema.schema.getField(0);
  return ConversionPatterns.listType(
      Repetition.OPTIONAL,
      name,
      convertTuple(name(innerField.alias, "bag"), innerField, Repetition.REPEATED));
}
 
Example #5
Source File: AvroSchemaConverter190Int96Avro18.java    From datacollector with Apache License 2.0 4 votes vote down vote up
private Type convertFieldUsingLogicalType(String fieldName, Schema schema, Type.Repetition repetition) {
  LOG.debug("Converting field: {} using LogicalType", fieldName);
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();

  LogicalType logicalType = schema.getLogicalType();

  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    // Special case handling timestamp until int96 fully supported or logical types correctly supported
    if (logicalType instanceof LogicalTypes.TimestampMillis || logicalType instanceof LogicalTypes.TimestampMicros) {
      LOG.debug("Logical type is a timestamp millis or micros");
      builder = Types.primitive(INT96, repetition);
    } else {
      builder = Types.primitive(INT64, repetition);
    }
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    builder = Types.primitive(BINARY, repetition).as(UTF8);
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(ENUM);
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
                   .length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence

  if (logicalType != null &&
      !(logicalType instanceof LogicalTypes.TimestampMillis || logicalType instanceof LogicalTypes.TimestampMicros)) {
    if (logicalType instanceof LogicalTypes.Decimal) {
      builder = builder.as(DECIMAL)
                       .precision(((LogicalTypes.Decimal) logicalType).getPrecision())
                       .scale(((LogicalTypes.Decimal) logicalType).getScale());

    } else {
      OriginalType annotation = convertLogicalType(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}
 
Example #6
Source File: AvroSchemaConverter190Int96Avro17.java    From datacollector with Apache License 2.0 4 votes vote down vote up
private Type convertFieldWithoutUsingLogicalType(String fieldName, Schema schema, Type.Repetition repetition) {
  LOG.debug("Converting field: {} without using LogicalType", fieldName);
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();

  String logicalType = schema.getProp(AvroTypeUtil.LOGICAL_TYPE);

  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    // Special case handling timestamp until int96 fully supported or logical types correctly supported
    if (AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType) ||
        AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType)) {
      LOG.debug("Logical type is a timestamp millis or micros");
      builder = Types.primitive(INT96, repetition);
    } else {
      builder = Types.primitive(INT64, repetition);
    }
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    builder = Types.primitive(BINARY, repetition).as(UTF8);
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(ENUM);
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
                   .length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence

  if (logicalType != null &&
      !(AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType) || AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType))) {
    if (AvroTypeUtil.LOGICAL_TYPE_DECIMAL.equals(logicalType)) {
      builder = (Types.PrimitiveBuilder<PrimitiveType>) builder.as(DECIMAL)
         .precision(schema.getJsonProp(AvroTypeUtil.LOGICAL_TYPE_ATTR_PRECISION).getIntValue())
         .scale(schema.getJsonProp(AvroTypeUtil.LOGICAL_TYPE_ATTR_SCALE).getIntValue());

    } else {
      OriginalType annotation = convertLogicalTypeStr(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}
 
Example #7
Source File: AvroSchemaConverterLogicalTypesPre19.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) {
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();
  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    builder = Types.primitive(INT64, repetition);
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    builder = Types.primitive(BINARY, repetition).as(UTF8);
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(ENUM);
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = (Types.PrimitiveBuilder<PrimitiveType>) Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition).length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence
  String logicalType = schema.getProp(LOGICAL_TYPE);
  if (logicalType != null) {
    if (LOGICAL_TYPE_DECIMAL.equals(logicalType)) {
      builder = (Types.PrimitiveBuilder<PrimitiveType>) builder.as(DECIMAL)
          .precision(schema.getJsonProp(LOGICAL_PROP_PRECISION).getIntValue())
          .scale(schema.getJsonProp(LOGICAL_PROP_SCALE).getIntValue());

    } else {
      OriginalType annotation = convertLogicalType(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}
 
Example #8
Source File: AvroSchemaConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) {
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();
  LogicalType logicalType = schema.getLogicalType();
  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    builder = Types.primitive(INT64, repetition);
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    if (logicalType != null && logicalType.getName().equals(LogicalTypes.uuid().getName()) && writeParquetUUID) {
      builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
          .length(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES);
    } else {
      builder = Types.primitive(BINARY, repetition).as(stringType());
    }
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(enumType());
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
        .length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence
  if (logicalType != null) {
    if (logicalType instanceof LogicalTypes.Decimal) {
      LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType;
      builder = builder.as(decimalType(decimal.getScale(), decimal.getPrecision()));
    } else {
      LogicalTypeAnnotation annotation = convertLogicalType(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}