Java Code Examples for org.apache.parquet.schema.Type#Repetition

The following examples show how to use org.apache.parquet.schema.Type#Repetition . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetResolverTest.java    From pxf with Apache License 2.0 6 votes vote down vote up
private MessageType getParquetSchemaForPrimitiveTypes(Type.Repetition repetition, boolean readCase) {
    List<Type> fields = new ArrayList<>();

    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "s1", OriginalType.UTF8));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "s2", OriginalType.UTF8));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT32, "n1", null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.DOUBLE, "d1", null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, 16, "dc1", OriginalType.DECIMAL, new DecimalMetadata(38, 18), null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT96, "tm", null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.FLOAT, "f", null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT64, "bg", null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BOOLEAN, "b", null));

    // GPDB only has int16 and not int8 type, so for write tiny numbers int8 are still treated as shorts in16
    OriginalType tinyType = readCase ? OriginalType.INT_8 : OriginalType.INT_16;
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT32, "tn", tinyType));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT32, "sml", OriginalType.INT_16));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "vc1", OriginalType.UTF8));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "c1", OriginalType.UTF8));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.BINARY, "bin", null));

    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT96, "tmtz", null));
    fields.add(new PrimitiveType(repetition, PrimitiveTypeName.INT96, "tmtz2", null));

    return new MessageType("hive_schema", fields);
}
 
Example 2
Source File: TestPigSchemaConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testListsOfPrimitive() throws Exception {
  for (Type.Repetition repetition : Type.Repetition.values()) {
    for (Type.Repetition valueRepetition : Type.Repetition.values()) {
      for (PrimitiveType.PrimitiveTypeName primitiveTypeName : PrimitiveType.PrimitiveTypeName.values()) {
        if (primitiveTypeName != PrimitiveType.PrimitiveTypeName.INT96) { // INT96 is NYI
          Types.PrimitiveBuilder<PrimitiveType> value = Types.primitive(primitiveTypeName, valueRepetition);
          if (primitiveTypeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
            value.length(1);
          GroupType type = Types.buildGroup(repetition).addField(value.named("b")).as(OriginalType.LIST).named("a");
          pigSchemaConverter.convertField(type); // no exceptions, please
        }
      }
    }
  }
}
 
Example 3
Source File: JsonElementConversionFactory.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Override
Object convertField(JsonElement value) {
  ParquetGroup r1 = new ParquetGroup((GroupType) schema());
  JsonObject inputRecord = value.getAsJsonObject();
  for (Map.Entry<String, JsonElement> entry : inputRecord.entrySet()) {
    String key = entry.getKey();
    JsonElementConverter converter = this.converters.get(key);
    Object convertedValue = converter.convert(entry.getValue());
    boolean valueIsNull = convertedValue == null;
    Type.Repetition repetition = optionalOrRequired(converter.jsonSchema);
    if (valueIsNull && repetition.equals(OPTIONAL)) {
      continue;
    }
    r1.add(key, convertedValue);
  }
  return r1;
}
 
Example 4
Source File: AvroSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Type convertUnionToGroupType(String fieldName, Type.Repetition repetition, List<Schema> nonNullSchemas) {
  List<Type> unionTypes = new ArrayList<Type>(nonNullSchemas.size());
  int index = 0;
  for (Schema childSchema : nonNullSchemas) {
    unionTypes.add( convertField("member" + index++, childSchema, Type.Repetition.OPTIONAL));
  }
  return new GroupType(repetition, fieldName, unionTypes);
}
 
Example 5
Source File: AvroSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Type convertUnion(String fieldName, Schema schema, Type.Repetition repetition) {
  List<Schema> nonNullSchemas = new ArrayList<Schema>(schema.getTypes().size());
  // Found any schemas in the union? Required for the edge case, where the union contains only a single type.
  boolean foundNullSchema = false;
  for (Schema childSchema : schema.getTypes()) {
    if (childSchema.getType().equals(Schema.Type.NULL)) {
      foundNullSchema = true;
      if (Type.Repetition.REQUIRED == repetition) {
        repetition = Type.Repetition.OPTIONAL;
      }
    } else {
      nonNullSchemas.add(childSchema);
    }
  }
  // If we only get a null and one other type then its a simple optional field
  // otherwise construct a union container
  switch (nonNullSchemas.size()) {
    case 0:
      throw new UnsupportedOperationException("Cannot convert Avro union of only nulls");

    case 1:
      return foundNullSchema ? convertField(fieldName, nonNullSchemas.get(0), repetition) :
        convertUnionToGroupType(fieldName, repetition, nonNullSchemas);

    default: // complex union type
      return convertUnionToGroupType(fieldName, repetition, nonNullSchemas);
  }
}
 
Example 6
Source File: ThriftSchemaConvertVisitor.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Type.Repetition getRepetition(ThriftField thriftField) {
  switch (thriftField.getRequirement()) {
    case REQUIRED:
      return REQUIRED;
    case OPTIONAL:
      return OPTIONAL;
    case DEFAULT:
      return OPTIONAL;
    default:
      throw new IllegalArgumentException("unknown requirement type: " + thriftField.getRequirement());
  }
}
 
Example 7
Source File: ProtoSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Type.Repetition getRepetition(FieldDescriptor descriptor) {
  if (descriptor.isRequired()) {
    return Type.Repetition.REQUIRED;
  } else if (descriptor.isRepeated()) {
    return Type.Repetition.REPEATED;
  } else {
    return Type.Repetition.OPTIONAL;
  }
}
 
Example 8
Source File: PentahoParquetWriteSupport.java    From pentaho-hadoop-shims with Apache License 2.0 4 votes vote down vote up
private PrimitiveType convertToPrimitiveType( IParquetOutputField f ) {
  Type.Repetition rep = f.getAllowNull() ? Type.Repetition.OPTIONAL : Type.Repetition.REQUIRED;
  String formatFieldName = f.getFormatFieldName();
  switch ( f.getParquetType() ) {
    case BINARY:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.BINARY, formatFieldName );
    case BOOLEAN:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.BOOLEAN, formatFieldName );
    case DOUBLE:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.DOUBLE, formatFieldName );
    case FLOAT:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.FLOAT, formatFieldName );
    case INT_32:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT32, formatFieldName );
    case UTF8:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.BINARY, formatFieldName, OriginalType.UTF8 );
    case INT_64:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT64, formatFieldName, OriginalType.INT_64 );
    case INT_96:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT96, formatFieldName );
    case DATE:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT32, formatFieldName, OriginalType.DATE );
    case DECIMAL:
      if ( f.getAllowNull() ) {
        return Types.optional( PrimitiveType.PrimitiveTypeName.BINARY ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      } else {
        return Types.required( PrimitiveType.PrimitiveTypeName.BINARY ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      }
    case DECIMAL_INT_32:
      if ( f.getAllowNull() ) {
        return Types.optional( PrimitiveType.PrimitiveTypeName.INT32 ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      } else {
        return Types.required( PrimitiveType.PrimitiveTypeName.INT32 ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      }
    case DECIMAL_INT_64:
      if ( f.getAllowNull() ) {
        return Types.optional( PrimitiveType.PrimitiveTypeName.INT64 ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      } else {
        return Types.required( PrimitiveType.PrimitiveTypeName.INT64 ).as( OriginalType.DECIMAL )
          .precision( f.getPrecision() ).scale( f.getScale() ).named( formatFieldName );
      }
    case TIMESTAMP_MILLIS:
      return new PrimitiveType( rep, PrimitiveType.PrimitiveTypeName.INT64, formatFieldName,
        OriginalType.TIMESTAMP_MILLIS );
    default:
      throw new RuntimeException( "Unsupported output type: " + f.getParquetType() );
  }
}
 
Example 9
Source File: PathAction.java    From iow-hadoop-streaming with Apache License 2.0 4 votes vote down vote up
public void setRepetition(Type.Repetition repetition) {
    this.repetition = repetition;
}
 
Example 10
Source File: PathAction.java    From iow-hadoop-streaming with Apache License 2.0 4 votes vote down vote up
public Type.Repetition getRepetition() {
    return this.repetition;
}
 
Example 11
Source File: AvroSchemaConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) {
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();
  LogicalType logicalType = schema.getLogicalType();
  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    builder = Types.primitive(INT64, repetition);
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    if (logicalType != null && logicalType.getName().equals(LogicalTypes.uuid().getName()) && writeParquetUUID) {
      builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
          .length(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES);
    } else {
      builder = Types.primitive(BINARY, repetition).as(stringType());
    }
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(enumType());
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
        .length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence
  if (logicalType != null) {
    if (logicalType instanceof LogicalTypes.Decimal) {
      LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType;
      builder = builder.as(decimalType(decimal.getScale(), decimal.getPrecision()));
    } else {
      LogicalTypeAnnotation annotation = convertLogicalType(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}
 
Example 12
Source File: JsonElementConversionFactory.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
public static Type.Repetition optionalOrRequired(JsonSchema jsonBaseSchema) {
  return jsonBaseSchema.isNullable() ? OPTIONAL : REQUIRED;
}
 
Example 13
Source File: ParquetSchemaConverter.java    From flink with Apache License 2.0 4 votes vote down vote up
private static Type convertToParquetType(
		String name, LogicalType type, Type.Repetition repetition) {
	switch (type.getTypeRoot()) {
		case CHAR:
		case VARCHAR:
			return Types
					.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition)
					.as(OriginalType.UTF8)
					.named(name);
		case BOOLEAN:
			return Types.primitive(PrimitiveType.PrimitiveTypeName.BOOLEAN, repetition).named(name);
		case BINARY:
		case VARBINARY:
			return Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition).named(name);
		case DECIMAL:
			int precision = ((DecimalType) type).getPrecision();
			int scale = ((DecimalType) type).getScale();
			int numBytes = computeMinBytesForDecimalPrecision(precision);
			return Types.primitive(
					PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, repetition)
					.precision(precision)
					.scale(scale)
					.length(numBytes)
					.as(OriginalType.DECIMAL)
					.named(name);
		case TINYINT:
			return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
					.as(OriginalType.INT_8)
					.named(name);
		case SMALLINT:
			return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
					.as(OriginalType.INT_16)
					.named(name);
		case INTEGER:
			return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
					.named(name);
		case BIGINT:
			return Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition)
					.named(name);
		case FLOAT:
			return Types.primitive(PrimitiveType.PrimitiveTypeName.FLOAT, repetition).named(name);
		case DOUBLE:
			return Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, repetition).named(name);
		case DATE:
			return Types
					.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
					.as(OriginalType.DATE)
					.named(name);
		case TIME_WITHOUT_TIME_ZONE:
			return Types
					.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
					.as(OriginalType.TIME_MILLIS)
					.named(name);
		case TIMESTAMP_WITHOUT_TIME_ZONE:
		case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
			return Types.primitive(PrimitiveType.PrimitiveTypeName.INT96, repetition)
					.named(name);
		default:
			throw new UnsupportedOperationException("Unsupported type: " + type);
	}
}
 
Example 14
Source File: AvroSchemaConverterLogicalTypesPre19.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) {
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();
  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    builder = Types.primitive(INT64, repetition);
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    builder = Types.primitive(BINARY, repetition).as(UTF8);
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(ENUM);
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = (Types.PrimitiveBuilder<PrimitiveType>) Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition).length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence
  String logicalType = schema.getProp(LOGICAL_TYPE);
  if (logicalType != null) {
    if (LOGICAL_TYPE_DECIMAL.equals(logicalType)) {
      builder = (Types.PrimitiveBuilder<PrimitiveType>) builder.as(DECIMAL)
          .precision(schema.getJsonProp(LOGICAL_PROP_PRECISION).getIntValue())
          .scale(schema.getJsonProp(LOGICAL_PROP_SCALE).getIntValue());

    } else {
      OriginalType annotation = convertLogicalType(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}
 
Example 15
Source File: AvroSchemaConverter190Int96Avro17.java    From datacollector with Apache License 2.0 4 votes vote down vote up
private Type convertFieldWithoutUsingLogicalType(String fieldName, Schema schema, Type.Repetition repetition) {
  LOG.debug("Converting field: {} without using LogicalType", fieldName);
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();

  String logicalType = schema.getProp(AvroTypeUtil.LOGICAL_TYPE);

  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    // Special case handling timestamp until int96 fully supported or logical types correctly supported
    if (AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType) ||
        AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType)) {
      LOG.debug("Logical type is a timestamp millis or micros");
      builder = Types.primitive(INT96, repetition);
    } else {
      builder = Types.primitive(INT64, repetition);
    }
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    builder = Types.primitive(BINARY, repetition).as(UTF8);
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(ENUM);
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
                   .length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence

  if (logicalType != null &&
      !(AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType) || AvroTypeUtil.LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType))) {
    if (AvroTypeUtil.LOGICAL_TYPE_DECIMAL.equals(logicalType)) {
      builder = (Types.PrimitiveBuilder<PrimitiveType>) builder.as(DECIMAL)
         .precision(schema.getJsonProp(AvroTypeUtil.LOGICAL_TYPE_ATTR_PRECISION).getIntValue())
         .scale(schema.getJsonProp(AvroTypeUtil.LOGICAL_TYPE_ATTR_SCALE).getIntValue());

    } else {
      OriginalType annotation = convertLogicalTypeStr(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}
 
Example 16
Source File: AvroSchemaConverter190Int96Avro17.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) {
  LOG.debug("Converting field: {}", fieldName);

  return convertFieldWithoutUsingLogicalType(fieldName, schema, repetition);
}
 
Example 17
Source File: AvroSchemaConverter190Int96Avro18.java    From datacollector with Apache License 2.0 4 votes vote down vote up
private Type convertFieldUsingLogicalType(String fieldName, Schema schema, Type.Repetition repetition) {
  LOG.debug("Converting field: {} using LogicalType", fieldName);
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();

  LogicalType logicalType = schema.getLogicalType();

  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    // Special case handling timestamp until int96 fully supported or logical types correctly supported
    if (logicalType instanceof LogicalTypes.TimestampMillis || logicalType instanceof LogicalTypes.TimestampMicros) {
      LOG.debug("Logical type is a timestamp millis or micros");
      builder = Types.primitive(INT96, repetition);
    } else {
      builder = Types.primitive(INT64, repetition);
    }
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    builder = Types.primitive(BINARY, repetition).as(UTF8);
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(ENUM);
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
                   .length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence

  if (logicalType != null &&
      !(logicalType instanceof LogicalTypes.TimestampMillis || logicalType instanceof LogicalTypes.TimestampMicros)) {
    if (logicalType instanceof LogicalTypes.Decimal) {
      builder = builder.as(DECIMAL)
                       .precision(((LogicalTypes.Decimal) logicalType).getPrecision())
                       .scale(((LogicalTypes.Decimal) logicalType).getScale());

    } else {
      OriginalType annotation = convertLogicalType(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}
 
Example 18
Source File: AvroSchemaConverter190Int96Avro18.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) {
  LOG.debug("Converting field: {}", fieldName);
  return convertFieldUsingLogicalType(fieldName, schema, repetition);

}