org.apache.parquet.schema.Type.Repetition Java Examples

The following examples show how to use org.apache.parquet.schema.Type.Repetition. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetRecordWriter.java    From Bats with Apache License 2.0 6 votes vote down vote up
private Type getType(MaterializedField field) {
  MinorType minorType = field.getType().getMinorType();
  DataMode dataMode = field.getType().getMode();
  switch (minorType) {
    case MAP:
      List<Type> types = Lists.newArrayList();
      for (MaterializedField childField : field.getChildren()) {
        types.add(getType(childField));
      }
      return new GroupType(dataMode == DataMode.REPEATED ? Repetition.REPEATED : Repetition.OPTIONAL, field.getName(), types);
    case LIST:
      throw new UnsupportedOperationException("Unsupported type " + minorType);
    case NULL:
      MaterializedField newField = field.withType(
        TypeProtos.MajorType.newBuilder().setMinorType(MinorType.INT).setMode(DataMode.OPTIONAL).build());
      return getPrimitiveType(newField);
    default:
      return getPrimitiveType(field);
  }
}
 
Example #2
Source File: TestTypeBuilders.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testPrimitiveTypeConstruction() {
  PrimitiveTypeName[] types = new PrimitiveTypeName[] {
      BOOLEAN, INT32, INT64, INT96, FLOAT, DOUBLE, BINARY
  };
  for (PrimitiveTypeName type : types) {
    String name = type.toString() + "_";
    for (Type.Repetition repetition : Type.Repetition.values()) {
      PrimitiveType expected = new PrimitiveType(repetition, type, name);
      PrimitiveType built = Types.primitive(type, repetition).named(name);
      Assert.assertEquals(expected, built);
      switch (repetition) {
        case REQUIRED:
          built = Types.required(type).named(name);
          break;
        case OPTIONAL:
          built = Types.optional(type).named(name);
          break;
        case REPEATED:
          built = Types.repeated(type).named(name);
          break;
      }
      Assert.assertEquals(expected, built);
    }
  }
}
 
Example #3
Source File: TestTypeBuilders.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testFixedTypeConstruction() {
  String name = "fixed_";
  int len = 5;
  for (Type.Repetition repetition : Type.Repetition.values()) {
    PrimitiveType expected = new PrimitiveType(
        repetition, FIXED_LEN_BYTE_ARRAY, len, name);
    PrimitiveType built = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
        .length(len).named(name);
    Assert.assertEquals(expected, built);
    switch (repetition) {
      case REQUIRED:
        built = Types.required(FIXED_LEN_BYTE_ARRAY).length(len).named(name);
        break;
      case OPTIONAL:
        built = Types.optional(FIXED_LEN_BYTE_ARRAY).length(len).named(name);
        break;
      case REPEATED:
        built = Types.repeated(FIXED_LEN_BYTE_ARRAY).length(len).named(name);
        break;
    }
    Assert.assertEquals(expected, built);
  }
}
 
Example #4
Source File: MessageTypeToType.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public Type list(GroupType array, Type elementType) {
  GroupType repeated = array.getType(0).asGroupType();
  org.apache.parquet.schema.Type element = repeated.getType(0);

  Preconditions.checkArgument(
      !element.isRepetition(Repetition.REPEATED),
      "Elements cannot have repetition REPEATED: %s", element);

  int elementFieldId = getId(element);

  addAlias(element.getName(), elementFieldId);

  if (element.isRepetition(Repetition.OPTIONAL)) {
    return Types.ListType.ofOptional(elementFieldId, elementType);
  } else {
    return Types.ListType.ofRequired(elementFieldId, elementType);
  }
}
 
Example #5
Source File: HiveGroupConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
protected static Converter getConverterFromDescription(final Type type, final int index,
    final HiveGroupConverter parent) {
  if (type == null) {
    return null;
  }
  if (type.isPrimitive()) {
    return ETypeConverter.getNewConverter(type.asPrimitiveType().getPrimitiveTypeName().javaType,
        index, parent);
  } else {
    if (type.asGroupType().getRepetition() == Repetition.REPEATED) {
      return new ArrayWritableGroupConverter(type.asGroupType(), parent, index);
    } else {
      return new DataWritableGroupConverter(type.asGroupType(), parent, index);
    }
  }
}
 
Example #6
Source File: MessageTypeToType.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public Type list(GroupType array, Type elementType) {
  GroupType repeated = array.getType(0).asGroupType();
  org.apache.parquet.schema.Type element = repeated.getType(0);

  Preconditions.checkArgument(
      !element.isRepetition(Repetition.REPEATED),
      "Elements cannot have repetition REPEATED: {}", element);

  int elementFieldId = getId(element);

  addAlias(element.getName(), elementFieldId);

  if (element.isRepetition(Repetition.OPTIONAL)) {
    return Types.ListType.ofOptional(elementFieldId, elementType);
  } else {
    return Types.ListType.ofRequired(elementFieldId, elementType);
  }
}
 
Example #7
Source File: TestHiveSchemaConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testMapOriginalType() throws Exception {
  final String hiveColumnTypes = "map<string,string>";
  final String hiveColumnNames = "mapCol";
  final List<String> columnNames = createHiveColumnsFrom(hiveColumnNames);
  final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
  final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
  // this messageType only has one optional field, whose name is mapCol, original Type is MAP
  assertEquals(1, messageTypeFound.getFieldCount());
  org.apache.parquet.schema.Type topLevel = messageTypeFound.getFields().get(0);
  assertEquals("mapCol",topLevel.getName());
  assertEquals(OriginalType.MAP, topLevel.getOriginalType());
  assertEquals(Repetition.OPTIONAL, topLevel.getRepetition());

  assertEquals(1, topLevel.asGroupType().getFieldCount());
  org.apache.parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0);
  //there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE;
  assertEquals("map", secondLevel.getName());
  assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType());
  assertEquals(Repetition.REPEATED, secondLevel.getRepetition());
}
 
Example #8
Source File: SingleLevelArrayMapKeyValuesSchemaConverter.java    From presto with Apache License 2.0 6 votes vote down vote up
public static GroupType mapType(Repetition repetition, String alias, String mapAlias, Type keyType, Type valueType)
{
    //support projection only on key of a map
    if (valueType == null) {
        return listWrapper(
                repetition,
                alias,
                MAP_KEY_VALUE,
                new GroupType(
                        Repetition.REPEATED,
                        mapAlias,
                        keyType));
    }
    if (!valueType.getName().equals("value")) {
        throw new RuntimeException(valueType.getName() + " should be value");
    }
    return listWrapper(
            repetition,
            alias,
            MAP_KEY_VALUE,
            new GroupType(
                    Repetition.REPEATED,
                    mapAlias,
                    keyType,
                    valueType));
}
 
Example #9
Source File: MessageTypeParser.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private static void addGroupType(Tokenizer st, Repetition r, GroupBuilder<?> builder) {
  GroupBuilder<?> childBuilder = builder.group(r);
  String t;
  String name = st.nextToken();

  // Read annotation, if any.
  t = st.nextToken();
  OriginalType originalType = null;
  if (t.equalsIgnoreCase("(")) {
    originalType = OriginalType.valueOf(st.nextToken());
    childBuilder.as(originalType);
    check(st.nextToken(), ")", "original type ended by )", st);
    t = st.nextToken();
  }
  if (t.equals("=")) {
    childBuilder.id(Integer.parseInt(st.nextToken()));
    t = st.nextToken();
  }
  try {
    addGroupTypeFields(t, st, childBuilder);
  } catch (IllegalArgumentException e) {
    throw new IllegalArgumentException("problem reading type: type = group, name = " + name + ", original type = " + originalType, e);
  }

  childBuilder.named(name);
}
 
Example #10
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private Schema convertFields(List<Type> parquetFields) {
  List<FieldSchema> fields = new ArrayList<Schema.FieldSchema>();
  for (Type parquetType : parquetFields) {
    try{
      FieldSchema innerfieldSchema = getFieldSchema(parquetType);
      if (parquetType.isRepetition(Repetition.REPEATED)) {
        Schema bagSchema = new Schema(Arrays.asList(innerfieldSchema));
        fields.add(new FieldSchema(null, bagSchema, DataType.BAG));
      } else {
        fields.add(innerfieldSchema);
      }
    }
    catch (FrontendException fe) {
      throw new SchemaConversionException("can't convert "+ parquetType, fe);
    }
  }
  return new Schema(fields);
}
 
Example #11
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath) {
  String name = Strings.repeat(".", depth) + type.getName();
  OriginalType otype = type.getOriginalType();
  Repetition rep = type.getRepetition();
  PrimitiveTypeName ptype = type.getPrimitiveTypeName();

  out.format("%s: %s %s", name, rep, ptype);
  if (otype != null) out.format(" O:%s", otype);

  if (container != null) {
    cpath.add(type.getName());
    String[] paths = cpath.toArray(new String[0]);
    cpath.remove(cpath.size() - 1);

    ColumnDescriptor desc = container.getColumnDescription(paths);

    int defl = desc.getMaxDefinitionLevel();
    int repl = desc.getMaxRepetitionLevel();
    out.format(" R:%d D:%d", repl, defl);
  }
  out.println();
}
 
Example #12
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param alias
 * @param fieldSchema
 * @return an optional group containing one repeated group field (key, value)
 * @throws FrontendException
 */
private GroupType convertMap(String alias, FieldSchema fieldSchema) {
  Schema innerSchema = fieldSchema.schema;
  if (innerSchema == null || innerSchema.size() != 1) {
    throw new SchemaConversionException("Invalid map Schema, schema should contain exactly one field: " + fieldSchema);
  }
  FieldSchema innerField = null;
  try {
    innerField = innerSchema.getField(0);
  } catch (FrontendException fe) {
    throw new SchemaConversionException("Invalid map schema, cannot infer innerschema: ", fe);
  }
  Type convertedValue = convertWithName(innerField, "value");
  return ConversionPatterns.stringKeyMapType(Repetition.OPTIONAL, alias, name(innerField.alias, "map"),
      convertedValue);
}
 
Example #13
Source File: ParquetRecordWriter.java    From Bats with Apache License 2.0 6 votes vote down vote up
protected PrimitiveType getPrimitiveType(MaterializedField field) {
  MinorType minorType = field.getType().getMinorType();
  String name = field.getName();
  int length = ParquetTypeHelper.getLengthForMinorType(minorType);
  PrimitiveTypeName primitiveTypeName = ParquetTypeHelper.getPrimitiveTypeNameForMinorType(minorType);
  if (Types.isDecimalType(minorType)) {
    primitiveTypeName = logicalTypeForDecimals;
    if (usePrimitiveTypesForDecimals) {
      if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT32)) {
        primitiveTypeName = PrimitiveTypeName.INT32;
      } else if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT64)) {
        primitiveTypeName = PrimitiveTypeName.INT64;
      }
    }

    length = DecimalUtility.getMaxBytesSizeForPrecision(field.getPrecision());
  }

  Repetition repetition = ParquetTypeHelper.getRepetitionForDataMode(field.getDataMode());
  OriginalType originalType = ParquetTypeHelper.getOriginalTypeForMinorType(minorType);
  DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field);
  return new PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata, null);
}
 
Example #14
Source File: MessageTypeParser.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void addType(String t, Tokenizer st, Types.GroupBuilder builder) {
  Repetition repetition = asRepetition(t, st);

  // Read type.
  String type = st.nextToken();
  if ("group".equalsIgnoreCase(type)) {
    addGroupType(st, repetition, builder);
  } else {
    addPrimitiveType(st, asPrimitive(type, st), repetition, builder);
  }
}
 
Example #15
Source File: TestTypeBuilders.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testTypeConstructionWithUndefinedColumnOrder() {
  PrimitiveTypeName[] types = new PrimitiveTypeName[] {
      BOOLEAN, INT32, INT64, INT96, FLOAT, DOUBLE, BINARY, FIXED_LEN_BYTE_ARRAY
  };
  for (PrimitiveTypeName type : types) {
    String name = type.toString() + "_";
    int len = type == FIXED_LEN_BYTE_ARRAY ? 42 : 0;
    PrimitiveType expected = new PrimitiveType(Repetition.OPTIONAL, type, len, name, null, null, null,
        ColumnOrder.undefined());
    PrimitiveType built = Types.optional(type).length(len).columnOrder(ColumnOrder.undefined()).named(name);
    Assert.assertEquals(expected, built);
  }
}
 
Example #16
Source File: TestTypeBuilders.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testGroupTypeConstruction() {
  PrimitiveType f1 = Types.required(BINARY).as(UTF8).named("f1");
  PrimitiveType f2 = Types.required(INT32).named("f2");
  PrimitiveType f3 = Types.optional(INT32).named("f3");
  String name = "group";
  for (Type.Repetition repetition : Type.Repetition.values()) {
    GroupType expected = new GroupType(repetition, name,
        f1,
        new GroupType(repetition, "g1", f2, f3));
    GroupType built = Types.buildGroup(repetition)
        .addField(f1)
        .group(repetition).addFields(f2, f3).named("g1")
        .named(name);
    Assert.assertEquals(expected, built);

    switch (repetition) {
      case REQUIRED:
        built = Types.requiredGroup()
            .addField(f1)
            .requiredGroup().addFields(f2, f3).named("g1")
            .named(name);
        break;
      case OPTIONAL:
        built = Types.optionalGroup()
            .addField(f1)
            .optionalGroup().addFields(f2, f3).named("g1")
            .named(name);
        break;
      case REPEATED:
        built = Types.repeatedGroup()
            .addField(f1)
            .repeatedGroup().addFields(f2, f3).named("g1")
            .named(name);
        break;
    }
    Assert.assertEquals(expected, built);
  }
}
 
Example #17
Source File: TestTypeBuilders.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testTypeConstructionWithTypeDefinedColumnOrder() {
  PrimitiveTypeName[] types = new PrimitiveTypeName[] {
      BOOLEAN, INT32, INT64, FLOAT, DOUBLE, BINARY, FIXED_LEN_BYTE_ARRAY
  };
  for (PrimitiveTypeName type : types) {
    String name = type.toString() + "_";
    int len = type == FIXED_LEN_BYTE_ARRAY ? 42 : 0;
    PrimitiveType expected = new PrimitiveType(Repetition.OPTIONAL, type, len, name, null, null, null,
        ColumnOrder.typeDefined());
    PrimitiveType built = Types.optional(type).length(len).columnOrder(ColumnOrder.typeDefined()).named(name);
    Assert.assertEquals(expected, built);
  }
}
 
Example #18
Source File: HiveSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(final String name, final MapTypeInfo typeInfo) {
  final Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
      typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
  final Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
      typeInfo.getMapValueTypeInfo());
  return ConversionPatterns.mapType(Repetition.OPTIONAL, name, keyType, valueType);
}
 
Example #19
Source File: TestTypeBuildersWithLogicalTypes.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testGroupTypeConstruction() {
  PrimitiveType f1 = Types.required(BINARY).as(stringType()).named("f1");
  PrimitiveType f2 = Types.required(INT32).named("f2");
  PrimitiveType f3 = Types.optional(INT32).named("f3");
  String name = "group";
  for (Repetition repetition : Repetition.values()) {
    GroupType expected = new GroupType(repetition, name,
        f1,
        new GroupType(repetition, "g1", f2, f3));
    GroupType built = Types.buildGroup(repetition)
        .addField(f1)
        .group(repetition).addFields(f2, f3).named("g1")
        .named(name);
    Assert.assertEquals(expected, built);

    switch (repetition) {
      case REQUIRED:
        built = Types.requiredGroup()
            .addField(f1)
            .requiredGroup().addFields(f2, f3).named("g1")
            .named(name);
        break;
      case OPTIONAL:
        built = Types.optionalGroup()
            .addField(f1)
            .optionalGroup().addFields(f2, f3).named("g1")
            .named(name);
        break;
      case REPEATED:
        built = Types.repeatedGroup()
            .addField(f1)
            .repeatedGroup().addFields(f2, f3).named("g1")
            .named(name);
        break;
    }
    Assert.assertEquals(expected, built);
  }
}
 
Example #20
Source File: DataWritableReadSupport.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 *
 * It creates the readContext for Parquet side with the requested schema during the init phase.
 *
 * @param configuration needed to get the wanted columns
 * @param keyValueMetaData // unused
 * @param fileSchema parquet file schema
 * @return the parquet ReadContext
 */
@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(final Configuration configuration,
    final Map<String, String> keyValueMetaData, final MessageType fileSchema) {
  final String columns = configuration.get(IOConstants.COLUMNS);
  final Map<String, String> contextMetadata = new HashMap<String, String>();
  if (columns != null) {
    final List<String> listColumns = getColumns(columns);

    final List<Type> typeListTable = new ArrayList<Type>();
    for (final String col : listColumns) {
      // listColumns contains partition columns which are metadata only
      if (fileSchema.containsField(col)) {
        typeListTable.add(fileSchema.getType(col));
      } else {
        // below allows schema evolution
        typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col));
      }
    }
    MessageType tableSchema = new MessageType(TABLE_SCHEMA, typeListTable);
    contextMetadata.put(HIVE_SCHEMA_KEY, tableSchema.toString());

    MessageType requestedSchemaByUser = tableSchema;
    final List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration);

    final List<Type> typeListWanted = new ArrayList<Type>();
    for (final Integer idx : indexColumnsWanted) {
      typeListWanted.add(tableSchema.getType(listColumns.get(idx)));
    }
    requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(),
            typeListWanted), fileSchema, configuration);

    return new ReadContext(requestedSchemaByUser, contextMetadata);
  } else {
    contextMetadata.put(HIVE_SCHEMA_KEY, fileSchema.toString());
    return new ReadContext(fileSchema, contextMetadata);
  }
}
 
Example #21
Source File: TupleConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
final public void start() {
  currentTuple = TF.newTuple(schemaSize);
  if (elephantBirdCompatible) {
    try {
      int i = 0;
      for (Type field : parquetSchema.getFields()) {
        if (field.isPrimitive() && field.isRepetition(Repetition.OPTIONAL)) {
          PrimitiveType primitiveType = field.asPrimitiveType();
          switch (primitiveType.getPrimitiveTypeName()) {
          case INT32:
            currentTuple.set(i, I32_ZERO);
            break;
          case INT64:
            currentTuple.set(i, I64_ZERO);
            break;
          case FLOAT:
            currentTuple.set(i, FLOAT_ZERO);
            break;
          case DOUBLE:
            currentTuple.set(i, DOUBLE_ZERO);
            break;
          case BOOLEAN:
            currentTuple.set(i, I32_ZERO);
            break;
          }
        }
        ++ i;
      }
    } catch (ExecException e) {
      throw new RuntimeException(e);
    }
  }
}
 
Example #22
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Type convertWithName(FieldSchema fieldSchema, String name) {
  try {
    switch (fieldSchema.type) {
    case DataType.BAG:
      return convertBag(name, fieldSchema);
    case DataType.TUPLE:
      return convertTuple(name, fieldSchema, Repetition.OPTIONAL);
    case DataType.MAP:
      return convertMap(name, fieldSchema);
    case DataType.BOOLEAN:
      return primitive(name, PrimitiveTypeName.BOOLEAN);
    case DataType.CHARARRAY:
      return primitive(name, PrimitiveTypeName.BINARY, stringType());
    case DataType.INTEGER:
      return primitive(name, PrimitiveTypeName.INT32);
    case DataType.LONG:
      return primitive(name, PrimitiveTypeName.INT64);
    case DataType.FLOAT:
      return primitive(name, PrimitiveTypeName.FLOAT);
    case DataType.DOUBLE:
      return primitive(name, PrimitiveTypeName.DOUBLE);
    case DataType.DATETIME:
      throw new UnsupportedOperationException();
    case DataType.BYTEARRAY:
      return primitive(name, PrimitiveTypeName.BINARY);
    default:
      throw new SchemaConversionException("Unknown type " + fieldSchema.type + " " + DataType.findTypeName(fieldSchema.type));
    }
  } catch (FrontendException e) {
    throw new SchemaConversionException("can't convert "+fieldSchema, e);
  }
}
 
Example #23
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param name
 * @param fieldSchema
 * @return an optional group containing one repeated group field
 * @throws FrontendException
 */
private GroupType convertBag(String name, FieldSchema fieldSchema) throws FrontendException {
  FieldSchema innerField = fieldSchema.schema.getField(0);
  return ConversionPatterns.listType(
      Repetition.OPTIONAL,
      name,
      convertTuple(name(innerField.alias, "bag"), innerField, Repetition.REPEATED));
}
 
Example #24
Source File: HiveSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static Type convertType(final String name, final TypeInfo typeInfo, final Repetition repetition) {
  if (typeInfo.getCategory().equals(Category.PRIMITIVE)) {
    if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.BINARY, name);
    } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
        typeInfo.equals(TypeInfoFactory.shortTypeInfo) ||
        typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.INT32, name);
    } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.INT64, name);
    } else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.DOUBLE, name);
    } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.FLOAT, name);
    } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.BOOLEAN, name);
    } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
      // TODO : binaryTypeInfo is a byte array. Need to map it
      throw new UnsupportedOperationException("Binary type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
      throw new UnsupportedOperationException("Timestamp type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
      throw new UnsupportedOperationException("Void type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
      throw new UnsupportedOperationException("Unknown type not implemented");
    } else {
      throw new IllegalArgumentException("Unknown type: " + typeInfo);
    }
  } else if (typeInfo.getCategory().equals(Category.LIST)) {
    return convertArrayType(name, (ListTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.STRUCT)) {
    return convertStructType(name, (StructTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.MAP)) {
    return convertMapType(name, (MapTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.UNION)) {
    throw new UnsupportedOperationException("Union type not implemented");
  } else {
    throw new IllegalArgumentException("Unknown type: " + typeInfo);
  }
}
 
Example #25
Source File: ValidatingRecordConsumer.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void validateMissingFields(int index) {
  for (int i = previousField.peek() + 1; i < index; i++) {
    Type type = types.peek().asGroupType().getType(i);
    if (type.isRepetition(Repetition.REQUIRED)) {
      throw new InvalidRecordException("required field is missing " + type);
    }
  }
}
 
Example #26
Source File: ColumnIO.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
ColumnIO getParent(int r) {
  if (getRepetitionLevel() == r && getType().isRepetition(Repetition.REPEATED)) {
    return this;
  } else  if (getParent()!=null && getParent().getDefinitionLevel()>=r) {
    return getParent().getParent(r);
  } else {
    throw new InvalidRecordException("no parent("+r+") for "+Arrays.toString(this.getFieldPath()));
  }
}
 
Example #27
Source File: ConversionPatterns.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * @param repetition repetition for the list
 * @param alias      name of the field
 * @param nestedType type of elements in the list
 * @return a group representing the list using a 2-level representation
 * @deprecated use listOfElements instead
 */
@Deprecated
public static GroupType listType(Repetition repetition, String alias, Type nestedType) {
  return listWrapper(
          repetition,
          alias,
          LogicalTypeAnnotation.listType(),
          nestedType
  );
}
 
Example #28
Source File: ConversionPatterns.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static GroupType mapType(Repetition repetition, String alias, String mapAlias, Type keyType, Type valueType) {
  //support projection only on key of a map
  if (valueType == null) {
    return listWrapper(
            repetition,
            alias,
            LogicalTypeAnnotation.mapType(),
            new GroupType(
                    Repetition.REPEATED,
                    mapAlias,
                    LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance(),
                    keyType)
    );
  } else {
    if (!valueType.getName().equals("value")) {
      throw new RuntimeException(valueType.getName() + " should be value");
    }
    return listWrapper(
            repetition,
            alias,
            LogicalTypeAnnotation.mapType(),
            new GroupType(
                    Repetition.REPEATED,
                    mapAlias,
                    LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance(),
                    keyType,
                    valueType)
    );
  }
}
 
Example #29
Source File: MessageTypeParser.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static Repetition asRepetition(String t, Tokenizer st) {
  try {
    return Repetition.valueOf(t.toUpperCase(Locale.ENGLISH));
  } catch (IllegalArgumentException e) {
    throw new IllegalArgumentException("expected one of " + Arrays.toString(Repetition.values())  +" got " + t + " at " + st.getLocationString(), e);
  }
}
 
Example #30
Source File: SchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * @param type parquet type
 * @param name overrides parquet.getName)
 * @param repetition overrides parquet.getRepetition()
 * @return a type mapping from the Parquet type to an Arrow type
 */
private TypeMapping fromParquet(Type type, String name, Repetition repetition) {
  if (repetition == REPEATED) {
    // case where we have a repeated field that is not in a List/Map
    TypeMapping child = fromParquet(type, null, REQUIRED);
    Field arrowField = new Field(name, false, new ArrowType.List(), asList(child.getArrowField()));
    return new RepeatedTypeMapping(arrowField, type, child);
  }
  if (type.isPrimitive()) {
    return fromParquetPrimitive(type.asPrimitiveType(), name);
  } else {
    return fromParquetGroup(type.asGroupType(), name);
  }
}