Java Code Examples for org.apache.parquet.schema.Type.Repetition#OPTIONAL

The following examples show how to use org.apache.parquet.schema.Type.Repetition#OPTIONAL . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetRecordWriter.java    From Bats with Apache License 2.0 6 votes vote down vote up
private Type getType(MaterializedField field) {
  MinorType minorType = field.getType().getMinorType();
  DataMode dataMode = field.getType().getMode();
  switch (minorType) {
    case MAP:
      List<Type> types = Lists.newArrayList();
      for (MaterializedField childField : field.getChildren()) {
        types.add(getType(childField));
      }
      return new GroupType(dataMode == DataMode.REPEATED ? Repetition.REPEATED : Repetition.OPTIONAL, field.getName(), types);
    case LIST:
      throw new UnsupportedOperationException("Unsupported type " + minorType);
    case NULL:
      MaterializedField newField = field.withType(
        TypeProtos.MajorType.newBuilder().setMinorType(MinorType.INT).setMode(DataMode.OPTIONAL).build());
      return getPrimitiveType(newField);
    default:
      return getPrimitiveType(field);
  }
}
 
Example 2
Source File: TestTypeBuilders.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testTypeConstructionWithUndefinedColumnOrder() {
  PrimitiveTypeName[] types = new PrimitiveTypeName[] {
      BOOLEAN, INT32, INT64, INT96, FLOAT, DOUBLE, BINARY, FIXED_LEN_BYTE_ARRAY
  };
  for (PrimitiveTypeName type : types) {
    String name = type.toString() + "_";
    int len = type == FIXED_LEN_BYTE_ARRAY ? 42 : 0;
    PrimitiveType expected = new PrimitiveType(Repetition.OPTIONAL, type, len, name, null, null, null,
        ColumnOrder.undefined());
    PrimitiveType built = Types.optional(type).length(len).columnOrder(ColumnOrder.undefined()).named(name);
    Assert.assertEquals(expected, built);
  }
}
 
Example 3
Source File: TestTypeBuilders.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testTypeConstructionWithTypeDefinedColumnOrder() {
  PrimitiveTypeName[] types = new PrimitiveTypeName[] {
      BOOLEAN, INT32, INT64, FLOAT, DOUBLE, BINARY, FIXED_LEN_BYTE_ARRAY
  };
  for (PrimitiveTypeName type : types) {
    String name = type.toString() + "_";
    int len = type == FIXED_LEN_BYTE_ARRAY ? 42 : 0;
    PrimitiveType expected = new PrimitiveType(Repetition.OPTIONAL, type, len, name, null, null, null,
        ColumnOrder.typeDefined());
    PrimitiveType built = Types.optional(type).length(len).columnOrder(ColumnOrder.typeDefined()).named(name);
    Assert.assertEquals(expected, built);
  }
}
 
Example 4
Source File: SingleLevelArrayMapKeyValuesSchemaConverter.java    From presto with Apache License 2.0 4 votes vote down vote up
private static Type convertType(String name, TypeInfo typeInfo, Repetition repetition)
{
    if (typeInfo.getCategory() == Category.PRIMITIVE) {
        if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
                    .named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
                typeInfo.equals(TypeInfoFactory.shortTypeInfo) ||
                typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.DOUBLE, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.FLOAT, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BOOLEAN, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
            throw new UnsupportedOperationException("Void type not implemented");
        }
        if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
                serdeConstants.CHAR_TYPE_NAME)) {
            if (repetition == Repetition.OPTIONAL) {
                return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
            }
            return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
        }
        if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
                serdeConstants.VARCHAR_TYPE_NAME)) {
            if (repetition == Repetition.OPTIONAL) {
                return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
            }
            return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
        }
        if (typeInfo instanceof DecimalTypeInfo) {
            DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
            int prec = decimalTypeInfo.precision();
            int scale = decimalTypeInfo.scale();
            int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
            if (repetition == Repetition.OPTIONAL) {
                return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
            }
            return Types.repeated(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
            throw new UnsupportedOperationException("Unknown type not implemented");
        }
        throw new IllegalArgumentException("Unknown type: " + typeInfo);
    }
    if (typeInfo.getCategory() == Category.LIST) {
        return convertArrayType(name, (ListTypeInfo) typeInfo, repetition);
    }
    if (typeInfo.getCategory() == Category.STRUCT) {
        return convertStructType(name, (StructTypeInfo) typeInfo, repetition);
    }
    if (typeInfo.getCategory() == Category.MAP) {
        return convertMapType(name, (MapTypeInfo) typeInfo, repetition);
    }
    if (typeInfo.getCategory() == Category.UNION) {
        throw new UnsupportedOperationException("Union type not implemented");
    }
    throw new IllegalArgumentException("Unknown type: " + typeInfo);
}
 
Example 5
Source File: SingleLevelArraySchemaConverter.java    From presto with Apache License 2.0 4 votes vote down vote up
private static Type convertType(String name, TypeInfo typeInfo,
        Repetition repetition)
{
    if (typeInfo.getCategory() == Category.PRIMITIVE) {
        if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
                    .named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
                typeInfo.equals(TypeInfoFactory.shortTypeInfo) ||
                typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.DOUBLE, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.FLOAT, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BOOLEAN, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
            throw new UnsupportedOperationException("Void type not implemented");
        }
        else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
                serdeConstants.CHAR_TYPE_NAME)) {
            if (repetition == Repetition.OPTIONAL) {
                return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
            }
            else {
                return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
            }
        }
        else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
                serdeConstants.VARCHAR_TYPE_NAME)) {
            if (repetition == Repetition.OPTIONAL) {
                return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
            }
            else {
                return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
            }
        }
        else if (typeInfo instanceof DecimalTypeInfo) {
            DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
            int prec = decimalTypeInfo.precision();
            int scale = decimalTypeInfo.scale();
            int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
            if (repetition == Repetition.OPTIONAL) {
                return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
            }
            else {
                return Types.repeated(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
            }
        }
        else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
            throw new UnsupportedOperationException("Unknown type not implemented");
        }
        else {
            throw new IllegalArgumentException("Unknown type: " + typeInfo);
        }
    }
    else if (typeInfo.getCategory() == Category.LIST) {
        return convertArrayType(name, (ListTypeInfo) typeInfo, repetition);
    }
    else if (typeInfo.getCategory() == Category.STRUCT) {
        return convertStructType(name, (StructTypeInfo) typeInfo, repetition);
    }
    else if (typeInfo.getCategory() == Category.MAP) {
        return convertMapType(name, (MapTypeInfo) typeInfo, repetition);
    }
    else if (typeInfo.getCategory() == Category.UNION) {
        throw new UnsupportedOperationException("Union type not implemented");
    }
    else {
        throw new IllegalArgumentException("Unknown type: " + typeInfo);
    }
}
 
Example 6
Source File: MapKeyValuesSchemaConverter.java    From presto with Apache License 2.0 4 votes vote down vote up
private static GroupType convertStructType(String name, StructTypeInfo typeInfo)
{
    List<String> columnNames = typeInfo.getAllStructFieldNames();
    List<TypeInfo> columnTypes = typeInfo.getAllStructFieldTypeInfos();
    return new GroupType(Repetition.OPTIONAL, name, convertTypes(columnNames, columnTypes));
}
 
Example 7
Source File: MapKeyValuesSchemaConverter.java    From presto with Apache License 2.0 4 votes vote down vote up
private static GroupType listWrapper(String name, OriginalType originalType, GroupType groupType)
{
    return new GroupType(Repetition.OPTIONAL, name, originalType, groupType);
}
 
Example 8
Source File: HiveSchemaConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private static GroupType convertStructType(final String name, final StructTypeInfo typeInfo) {
  final List<String> columnNames = typeInfo.getAllStructFieldNames();
  final List<TypeInfo> columnTypes = typeInfo.getAllStructFieldTypeInfos();
  return new GroupType(Repetition.OPTIONAL, name, convertTypes(columnNames, columnTypes));

}