Java Code Examples for org.apache.avro.Schema#createMap()

The following examples show how to use org.apache.avro.Schema#createMap() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FastSpecificSerializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldSerializeNullElementInMap() {
    // given
    Schema mapRecordSchema = Schema.createMap(Schema.createUnion(
            Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT)));

    Map<String, Object> records = new HashMap<>();
    records.put("0", "0");
    records.put("1", null);
    records.put("2", 2);

    // when
    Map<Utf8, Object> map = deserializeSpecific(mapRecordSchema, serializeSpecificFast(records, mapRecordSchema));

    // then
    Assert.assertEquals(3, map.size());
    Assert.assertEquals("0", map.get(new Utf8("0")).toString());
    Assert.assertNull(map.get(new Utf8("1")));
    Assert.assertEquals(2, map.get(new Utf8("2")));
}
 
Example 2
Source File: TestAvroSchemaConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testParquetMapWithoutMapKeyValueAnnotation() throws Exception {
  Schema schema = Schema.createRecord("myrecord", null, null, false);
  Schema map = Schema.createMap(Schema.create(INT));
  schema.setFields(Collections.singletonList(new Schema.Field("mymap", map, null, null)));
  String parquetSchema =
      "message myrecord {\n" +
          "  required group mymap (MAP) {\n" +
          "    repeated group map {\n" +
          "      required binary key (UTF8);\n" +
          "      required int32 value;\n" +
          "    }\n" +
          "  }\n" +
          "}\n";

  testParquetToAvroConversion(schema, parquetSchema);
  testParquetToAvroConversion(NEW_BEHAVIOR, schema, parquetSchema);
}
 
Example 3
Source File: TestAvroStorageUtils.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testMergeSchema6() throws IOException {
    Schema x, y, z;

    // value types are mergeable
    x = Schema.createMap(Schema.create(Schema.Type.INT));
    y = Schema.createMap(Schema.create(Schema.Type.DOUBLE));

    z = AvroStorageUtils.mergeSchema(x, y);
    assertEquals(Schema.Type.MAP, z.getType());
    assertEquals(Schema.Type.DOUBLE, z.getValueType().getType());

    // value types are not mergeable
    x = Schema.createMap(Schema.create(Schema.Type.INT));
    y = Schema.createMap(Schema.create(Schema.Type.BOOLEAN));

    try {
        z = AvroStorageUtils.mergeSchema(x, y);
        Assert.fail("exception is expected, but " + z.getType() + " is returned");
    } catch (IOException e) {
        assertEquals("Cannot merge "+ x.getValueType().getType() +
                     " with "+ y.getValueType().getType(), e.getMessage());
    }
}
 
Example 4
Source File: TestAvroStorageUtils.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testMergeSchema3() throws IOException {
    Schema complexType[] = {
        Schema.createRecord(new ArrayList<Schema.Field>()),
        Schema.createArray(Schema.create(Schema.Type.INT)),
        Schema.createMap(Schema.create(Schema.Type.INT)),
        Schema.createUnion(new ArrayList<Schema>()),
        Schema.createFixed("fixed", null, null, 1),
    };

    for (int i = 0; i < complexType.length; i++) {
        Schema x = complexType[i];
        for (int j = 0; j < complexType.length; j++) {
            Schema y = complexType[j];
            if (i != j) {
                try {
                    Schema z = AvroStorageUtils.mergeSchema(x, y);
                    Assert.fail("exception is expected, but " + z.getType() + " is returned");
                } catch (IOException e) {
                    assertEquals("Cannot merge "+ x.getType()+ " with "+ y.getType(), e.getMessage());
                }
            }
        }
    }
}
 
Example 5
Source File: FastGenericSerializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldSerializeNullElementInMap() {
    // given
    Schema mapRecordSchema = Schema.createMap(Schema.createUnion(
            Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT)));

    Map<String, Object> records = new HashMap<>();
    records.put("0", "0");
    records.put("1", null);
    records.put("2", 2);

    // when
    Map<Utf8, Object> map = deserializeGeneric(mapRecordSchema, serializeGenericFast(records, mapRecordSchema));

    // then
    Assert.assertEquals(3, map.size());
    Assert.assertEquals(new Utf8("0"), map.get(new Utf8("0")));
    Assert.assertNull(map.get(new Utf8("1")));
    Assert.assertEquals(2, map.get(new Utf8("2")));
}
 
Example 6
Source File: FastSpecificDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldDeserializeNullElementInMap() {
    // given
    Schema stringSchema = Schema.create(Schema.Type.STRING);
    Schema mapRecordSchema = Schema.createMap(Schema.createUnion(
            stringSchema, Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT)));

    Map<String, Object> records = new HashMap<>();
    records.put("0", "0");
    records.put("1", null);
    records.put("2", 2);

    // when
    Map<Utf8, Object> map = deserializeSpecificFast(mapRecordSchema, mapRecordSchema,
            serializeSpecific(records, mapRecordSchema));

    // then
    Assert.assertEquals(3, map.size());
    Assert.assertEquals("0", map.get(new Utf8("0")).toString());
    Assert.assertNull(map.get(new Utf8("1")));
    Assert.assertEquals(2, map.get(new Utf8("2")));
}
 
Example 7
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldReadMapOfJavaStrings() {
    // given
    Schema stringMapSchema = Schema.createMap(Schema.create(Schema.Type.STRING));
    Schema javaStringSchema = Schema.create(Schema.Type.STRING);
    GenericData.setStringType(javaStringSchema, GenericData.StringType.String);
    Schema javaStringMapSchema = Schema.createMap(javaStringSchema);

    Map<String, String> stringMap = new HashMap<>(0);
    stringMap.put("1", "abc");
    stringMap.put("2", "aaa");

    // when
    Map<Utf8, String> resultJavaStringMap = deserializeGenericFast(stringMapSchema, javaStringMapSchema,
            serializeGeneric(stringMap, javaStringMapSchema));

    // then
    Assert.assertEquals(2, resultJavaStringMap.size());
    Assert.assertEquals("abc", resultJavaStringMap.get(new Utf8("1")));
    Assert.assertEquals("aaa", resultJavaStringMap.get(new Utf8("2")));
}
 
Example 8
Source File: AvroGenerators.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Schema generate(SourceOfRandomness random, GenerationStatus status) {
  Schema.Type type;

  if (nesting(status) >= MAX_NESTING) {
    type = random.choose(PRIMITIVE_TYPES);
  } else {
    type = random.choose(ALL_TYPES);
  }

  if (PRIMITIVE_TYPES.contains(type)) {
    return Schema.create(type);
  } else {
    nestingInc(status);

    if (type == Schema.Type.FIXED) {
      int size = random.choose(Arrays.asList(1, 5, 12));
      return Schema.createFixed("fixed_" + branch(status), "", "", size);
    } else if (type == Schema.Type.UNION) {
      // only nullable fields, everything else isn't supported in row conversion code
      return UnionSchemaGenerator.INSTANCE.generate(random, status);
    } else if (type == Schema.Type.ENUM) {
      return EnumSchemaGenerator.INSTANCE.generate(random, status);
    } else if (type == Schema.Type.RECORD) {
      return RecordSchemaGenerator.INSTANCE.generate(random, status);
    } else if (type == Schema.Type.MAP) {
      return Schema.createMap(generate(random, status));
    } else if (type == Schema.Type.ARRAY) {
      return Schema.createArray(generate(random, status));
    } else {
      throw new AssertionError("Unexpected AVRO type: " + type);
    }
  }
}
 
Example 9
Source File: FastGenericSerializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test(groups = {"serializationTest"})
public void shouldWriteMapOfRecords() {
  // given
  Schema recordSchema = createRecord("record", createPrimitiveUnionFieldSchema("field", Schema.Type.STRING));

  Schema mapRecordSchema = Schema.createMap(recordSchema);

  GenericData.Record subRecordBuilder = new GenericData.Record(recordSchema);
  subRecordBuilder.put("field", "abc");

  Map<String, GenericData.Record> recordsMap = new HashMap<>();
  recordsMap.put("1", subRecordBuilder);
  recordsMap.put("2", subRecordBuilder);

  // when
  Map<Utf8, GenericRecord> map = decodeRecord(mapRecordSchema, dataAsBinaryDecoder(recordsMap, mapRecordSchema));

  // then
  Assert.assertEquals(2, map.size());
  Assert.assertEquals("abc", map.get(new Utf8("1")).get("field").toString());
  Assert.assertEquals("abc", map.get(new Utf8("2")).get("field").toString());

  // given
  mapRecordSchema = Schema.createMap(createUnionSchema(recordSchema));

  subRecordBuilder = new GenericData.Record(recordSchema);
  subRecordBuilder.put("field", "abc");

  recordsMap = new HashMap<>();
  recordsMap.put("1", subRecordBuilder);
  recordsMap.put("2", subRecordBuilder);

  // when
  map = decodeRecord(mapRecordSchema, dataAsBinaryDecoder(recordsMap, mapRecordSchema));

  // then
  Assert.assertEquals(2, map.size());
  Assert.assertEquals("abc", map.get(new Utf8("1")).get("field").toString());
  Assert.assertEquals("abc", map.get(new Utf8("2")).get("field").toString());
}
 
Example 10
Source File: FastSpecificSerializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test(groups = {"serializationTest"})
public void shouldWriteMapOfRecords() {
  // given
  Schema mapRecordSchema = Schema.createMap(TestRecord.SCHEMA$);

  TestRecord testRecord = emptyTestRecord();
  testRecord.testString = "abc";

  Map<String, TestRecord> recordsMap = new HashMap<>();
  recordsMap.put("1", testRecord);
  recordsMap.put("2", testRecord);

  // when
  Map<String, TestRecord> map = decodeRecordFast(mapRecordSchema, dataAsDecoder(recordsMap, mapRecordSchema));

  // then
  Assert.assertEquals(2, map.size());
  Assert.assertEquals("abc", map.get(new Utf8("1")).testString.toString());
  Assert.assertEquals("abc", map.get(new Utf8("2")).testString.toString());

  // given
  mapRecordSchema = Schema.createMap(createUnionSchema(TestRecord.SCHEMA$));

  testRecord = emptyTestRecord();
  testRecord.testString = "abc";

  recordsMap = new HashMap<>();
  recordsMap.put("1", testRecord);
  recordsMap.put("2", testRecord);

  // when
  map = decodeRecordFast(mapRecordSchema, dataAsDecoder(recordsMap, mapRecordSchema));

  // then
  Assert.assertEquals(2, map.size());
  Assert.assertEquals("abc", map.get(new Utf8("1")).testString.toString());
  Assert.assertEquals("abc", map.get(new Utf8("2")).testString.toString());
}
 
Example 11
Source File: TestTableConversion.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertMaps() {
  TypeInfo mapOfLongsType = parseTypeInfo("map<string,bigint>");
  Schema mapOfLongsSchema = Schema.createMap(
      optional(Schema.create(Schema.Type.LONG)));
  Assert.assertEquals("Should convert map of primitive",
      mapOfLongsSchema,
      HiveSchemaConverter.convert(
          startPath, "test", mapOfLongsType, NO_REQUIRED_FIELDS));

  TypeInfo mapOfArraysType = parseTypeInfo("array<float>");
  Schema mapOfArraysSchema = Schema.createArray(
      optional(Schema.create(Schema.Type.FLOAT)));
  Assert.assertEquals("Should convert map of arrays",
      mapOfArraysSchema,
      HiveSchemaConverter.convert(
          startPath, "test", mapOfArraysType, NO_REQUIRED_FIELDS));

  TypeInfo mapOfMapsType = parseTypeInfo(
      "array<map<string,map<string,bigint>>>");
  Schema mapOfMapsSchema = Schema.createArray(
      optional(Schema.createMap(optional(mapOfLongsSchema))));
  Assert.assertEquals("Should convert map of maps",
      mapOfMapsSchema,
      HiveSchemaConverter.convert(
          startPath, "test", mapOfMapsType, NO_REQUIRED_FIELDS));

  TypeInfo mapOfStructsType = parseTypeInfo("map<string," +
      "struct<a:array<float>,b:array<map<string,map<string,bigint>>>>>");
  Schema recordSchema = Schema.createRecord("test", null, null, false);
  recordSchema.setFields(Lists.newArrayList(
      new Schema.Field("a", optional(mapOfArraysSchema), null, NULL_DEFAULT),
      new Schema.Field("b", optional(mapOfMapsSchema), null, NULL_DEFAULT)
  ));
  Schema mapOfStructsSchema = Schema.createMap(optional(recordSchema));
  Assert.assertEquals("Should convert map of structs",
      mapOfStructsSchema,
      HiveSchemaConverter.convert(
          startPath, "test", mapOfStructsType, NO_REQUIRED_FIELDS));
}
 
Example 12
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldReadMapOfRecords() {
    // given
    Schema recordSchema = createRecord("record",
            createPrimitiveUnionFieldSchema("field", Schema.Type.STRING));

    Schema mapRecordSchema = Schema.createMap(recordSchema);

    GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(recordSchema);
    subRecordBuilder.set("field", "abc");

    Map<String, GenericData.Record> recordsMap = new HashMap<>();
    recordsMap.put("1", subRecordBuilder.build());
    recordsMap.put("2", subRecordBuilder.build());

    // when
    Map<Utf8, GenericRecord> map = deserializeGenericFast(mapRecordSchema, mapRecordSchema,
            serializeGeneric(recordsMap, mapRecordSchema));

    // then
    Assert.assertEquals(2, map.size());
    Assert.assertEquals("abc", map.get(new Utf8("1")).get("field").toString());
    Assert.assertEquals("abc", map.get(new Utf8("2")).get("field").toString());

    // given
    mapRecordSchema = Schema.createMap(createUnionSchema(recordSchema));

    // when
    map = deserializeGenericFast(mapRecordSchema, mapRecordSchema, serializeGeneric(recordsMap, mapRecordSchema));

    // then
    Assert.assertEquals(2, map.size());
    Assert.assertEquals("abc", map.get(new Utf8("1")).get("field").toString());
    Assert.assertEquals("abc", map.get(new Utf8("2")).get("field").toString());
}
 
Example 13
Source File: FastSpecificDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldReadMapOfRecords() {
    // given
    Schema mapRecordSchema = Schema.createMap(TestRecord.getClassSchema());

    TestRecord testRecord = emptyTestRecord();
    testRecord.put("testStringUnion", "abc");

    Map<String, TestRecord> recordsMap = new HashMap<>();
    recordsMap.put("1", testRecord);
    recordsMap.put("2", testRecord);

    // when
    Map<Utf8, TestRecord> map = deserializeSpecificFast(mapRecordSchema, mapRecordSchema,
            serializeSpecific(recordsMap, mapRecordSchema));

    // then
    Assert.assertEquals(2, map.size());
    Assert.assertEquals("abc", map.get(new Utf8("1")).get("testStringUnion").toString());
    Assert.assertEquals("abc", map.get(new Utf8("2")).get("testStringUnion").toString());

    // given
    mapRecordSchema = Schema.createMap(createUnionSchema(TestRecord
            .getClassSchema()));

    testRecord = emptyTestRecord();
    testRecord.put("testStringUnion", "abc");

    recordsMap = new HashMap<>();
    recordsMap.put("1", testRecord);
    recordsMap.put("2", testRecord);

    // when
    map = deserializeSpecificFast(mapRecordSchema, mapRecordSchema,
            serializeSpecific(recordsMap, mapRecordSchema));

    // then
    Assert.assertEquals(2, map.size());
    Assert.assertEquals("abc", map.get(new Utf8("1")).get("testStringUnion"));
    Assert.assertEquals("abc", map.get(new Utf8("2")).get("testStringUnion"));
}
 
Example 14
Source File: AvroUtils.java    From Cubert with Apache License 2.0 4 votes vote down vote up
private static Schema convertFromBlockSchema(final String name,
                                             final Type type,
                                             final BlockSchema schema,
                                             boolean toplevel)
{

    Schema avroSchema;
    switch (type)
    {
    case RECORD:
        Field[] fields = createFields(schema);
        avroSchema = Schema.createRecord(name, null, null, false);
        avroSchema.setFields(Arrays.asList(fields));
        if (toplevel)
          break;
        List<Schema> unionSchema = new ArrayList<Schema>();
        unionSchema.add(Schema.create(Type.NULL));
        unionSchema.add(avroSchema);
        avroSchema = Schema.createUnion(unionSchema);
        break;
    case ARRAY:
    {
        if (schema.getNumColumns() != 1)
        {
            throw new RuntimeException("Type ARRAY must have a single element in the subschema");
        }

        ColumnType elemColType = schema.getColumnType(0);
        Schema elemType;
        if (elemColType.getColumnSchema() == null)
        {
            elemType = Schema.create(convertToAvroType(elemColType.getType()));
        }
        else
        {
            elemType =
                    convertFromBlockSchema(elemColType.getName() + (arrayElemInSchemaCounter++),
                                           convertToAvroType(elemColType.getType()),
                                           elemColType.getColumnSchema(), false);
        }

        avroSchema = Schema.createArray(elemType);

        unionSchema = new ArrayList<Schema>();
        unionSchema.add(Schema.create(Type.NULL));
        unionSchema.add(avroSchema);
        avroSchema = Schema.createUnion(unionSchema);

        break;
    }
    case MAP:
    {
        ColumnType valueColType = schema.getColumnType(0);
        Schema valueType;
        if (valueColType.getColumnSchema() == null)
        {
            valueType = Schema.create(convertToAvroType(valueColType.getType()));
        }
        else
        {
            valueType =
                    convertFromBlockSchema(valueColType.getName(),
                                           convertToAvroType(valueColType.getType()),
                                           valueColType.getColumnSchema(), false);
        }
        avroSchema = Schema.createMap(valueType);

        unionSchema = new ArrayList<Schema>();
        unionSchema.add(Schema.create(Type.NULL));
        unionSchema.add(avroSchema);
        avroSchema = Schema.createUnion(unionSchema);

        break;
    }
    default:
        throw new IllegalArgumentException("Unsupported composite Type: " + type);
    }
    return avroSchema;
}
 
Example 15
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldReadMapOfPrimitives() {
    // given
    Schema stringMapSchema = Schema.createMap(Schema.create(Schema.Type.STRING));

    Map<String, String> stringMap = new HashMap<>(0);
    stringMap.put("1", "abc");
    stringMap.put("2", "aaa");

    Schema intMapSchema = Schema.createMap(Schema.create(Schema.Type.INT));

    Map<String, Integer> intMap = new HashMap<>(0);
    intMap.put("1", 1);
    intMap.put("2", 2);

    Schema longMapSchema = Schema.createMap(Schema.create(Schema.Type.LONG));

    Map<String, Long> longMap = new HashMap<>(0);
    longMap.put("1", 1L);
    longMap.put("2", 2L);

    Schema doubleMapSchema = Schema.createMap(Schema.create(Schema.Type.DOUBLE));

    Map<String, Double> doubleMap = new HashMap<>(0);
    doubleMap.put("1", 1.0);
    doubleMap.put("2", 2.0);

    Schema floatMapSchema = Schema.createMap(Schema.create(Schema.Type.FLOAT));

    Map<String, Float> floatMap = new HashMap<>(0);
    floatMap.put("1", 1.0f);
    floatMap.put("2", 2.0f);

    Schema bytesMapSchema = Schema.createMap(Schema.create(Schema.Type.BYTES));

    Map<String, ByteBuffer> bytesMap = new HashMap<>(0);
    bytesMap.put("1", ByteBuffer.wrap(new byte[]{0x01}));
    bytesMap.put("2", ByteBuffer.wrap(new byte[]{0x02}));

    // when
    Map<Utf8, Utf8> resultStringMap = deserializeGenericFast(stringMapSchema, stringMapSchema,
            serializeGeneric(stringMap, stringMapSchema));

    Map<Utf8, Integer> resultIntegerMap = deserializeGenericFast(intMapSchema, intMapSchema,
            serializeGeneric(intMap, intMapSchema));

    Map<Utf8, Long> resultLongMap = deserializeGenericFast(longMapSchema, longMapSchema,
            serializeGeneric(longMap, longMapSchema));

    Map<Utf8, Double> resultDoubleMap = deserializeGenericFast(doubleMapSchema, doubleMapSchema,
            serializeGeneric(doubleMap, doubleMapSchema));

    Map<Utf8, Float> resultFloatMap = deserializeGenericFast(floatMapSchema, floatMapSchema,
            serializeGeneric(floatMap, floatMapSchema));

    Map<Utf8, ByteBuffer> resultBytesMap = deserializeGenericFast(bytesMapSchema, bytesMapSchema,
            serializeGeneric(bytesMap, bytesMapSchema));

    // then
    Assert.assertEquals(2, resultStringMap.size());
    Assert.assertEquals("abc", resultStringMap.get(new Utf8("1")).toString());
    Assert.assertEquals("aaa", resultStringMap.get(new Utf8("2")).toString());

    Assert.assertEquals(2, resultIntegerMap.size());
    Assert.assertEquals(Integer.valueOf(1), resultIntegerMap.get(new Utf8("1")));
    Assert.assertEquals(Integer.valueOf(2), resultIntegerMap.get(new Utf8("2")));

    Assert.assertEquals(2, resultLongMap.size());
    Assert.assertEquals(Long.valueOf(1L), resultLongMap.get(new Utf8("1")));
    Assert.assertEquals(Long.valueOf(2L), resultLongMap.get(new Utf8("2")));

    Assert.assertEquals(2, resultDoubleMap.size());
    Assert.assertEquals(Double.valueOf(1.0), resultDoubleMap.get(new Utf8("1")));
    Assert.assertEquals(Double.valueOf(2.0), resultDoubleMap.get(new Utf8("2")));

    Assert.assertEquals(2, resultFloatMap.size());
    Assert.assertEquals(Float.valueOf(1f), resultFloatMap.get(new Utf8("1")));
    Assert.assertEquals(Float.valueOf(2f), resultFloatMap.get(new Utf8("2")));

    Assert.assertEquals(2, resultBytesMap.size());
    Assert.assertEquals(0x01, resultBytesMap.get(new Utf8("1")).get());
    Assert.assertEquals(0x02, resultBytesMap.get(new Utf8("2")).get());
}
 
Example 16
Source File: AvroUtils.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
/**
 * Copies the input {@link org.apache.avro.Schema} but changes the schema namespace.
 * @param schema {@link org.apache.avro.Schema} to copy.
 * @param namespaceOverride namespace for the copied {@link org.apache.avro.Schema}.
 * @return A {@link org.apache.avro.Schema} that is a copy of schema, but has the new namespace.
 */
public static Schema switchNamespace(Schema schema, Map<String, String> namespaceOverride) {
  Schema newSchema;
  String newNamespace = StringUtils.EMPTY;

  // Process all Schema Types
  // (Primitives are simply cloned)
  switch (schema.getType()) {
    case ENUM:
      newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace())
          : schema.getNamespace();
      newSchema =
          Schema.createEnum(schema.getName(), schema.getDoc(), newNamespace, schema.getEnumSymbols());
      break;
    case FIXED:
      newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace())
          : schema.getNamespace();
      newSchema =
          Schema.createFixed(schema.getName(), schema.getDoc(), newNamespace, schema.getFixedSize());
      break;
    case MAP:
      newSchema = Schema.createMap(switchNamespace(schema.getValueType(), namespaceOverride));
      break;
    case RECORD:
      newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace())
          : schema.getNamespace();
      List<Schema.Field> newFields = new ArrayList<>();
      if (schema.getFields().size() > 0) {
        for (Schema.Field oldField : schema.getFields()) {
          Field newField = new Field(oldField.name(), switchNamespace(oldField.schema(), namespaceOverride), oldField.doc(),
              oldField.defaultValue(), oldField.order());
          newFields.add(newField);
        }
      }
      newSchema = Schema.createRecord(schema.getName(), schema.getDoc(), newNamespace,
          schema.isError());
      newSchema.setFields(newFields);
      break;
    case UNION:
      List<Schema> newUnionMembers = new ArrayList<>();
      if (null != schema.getTypes() && schema.getTypes().size() > 0) {
        for (Schema oldUnionMember : schema.getTypes()) {
          newUnionMembers.add(switchNamespace(oldUnionMember, namespaceOverride));
        }
      }
      newSchema = Schema.createUnion(newUnionMembers);
      break;
    case ARRAY:
      newSchema = Schema.createArray(switchNamespace(schema.getElementType(), namespaceOverride));
      break;
    case BOOLEAN:
    case BYTES:
    case DOUBLE:
    case FLOAT:
    case INT:
    case LONG:
    case NULL:
    case STRING:
      newSchema = Schema.create(schema.getType());
      break;
    default:
      String exceptionMessage = String.format("Schema namespace replacement failed for \"%s\" ", schema);
      LOG.error(exceptionMessage);

      throw new AvroRuntimeException(exceptionMessage);
  }

  // Copy schema metadata
  copyProperties(schema, newSchema);

  return newSchema;
}
 
Example 17
Source File: FastGenericSerializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldWriteMapOfPrimitives() {
    // given
    Schema stringMapSchema = Schema.createMap(Schema.create(Schema.Type.STRING));

    Map<String, String> stringMap = new HashMap<>(0);
    stringMap.put("1", "abc");
    stringMap.put("2", "aaa");

    Schema intMapSchema = Schema.createMap(Schema.create(Schema.Type.INT));

    Map<String, Integer> intMap = new HashMap<>(0);
    intMap.put("1", 1);
    intMap.put("2", 2);

    Schema longMapSchema = Schema.createMap(Schema.create(Schema.Type.LONG));

    Map<String, Long> longMap = new HashMap<>(0);
    longMap.put("1", 1L);
    longMap.put("2", 2L);

    Schema doubleMapSchema = Schema.createMap(Schema.create(Schema.Type.DOUBLE));

    Map<String, Double> doubleMap = new HashMap<>(0);
    doubleMap.put("1", 1.0);
    doubleMap.put("2", 2.0);

    Schema floatMapSchema = Schema.createMap(Schema.create(Schema.Type.FLOAT));

    Map<String, Float> floatMap = new HashMap<>(0);
    floatMap.put("1", 1.0f);
    floatMap.put("2", 2.0f);

    Schema bytesMapSchema = Schema.createMap(Schema.create(Schema.Type.BYTES));

    Map<String, ByteBuffer> bytesMap = new HashMap<>(0);
    bytesMap.put("1", ByteBuffer.wrap(new byte[]{0x01}));
    bytesMap.put("2", ByteBuffer.wrap(new byte[]{0x02}));

    // when
    Map<Utf8, Utf8> resultStringMap = deserializeGeneric(stringMapSchema,
            serializeGenericFast(stringMap, stringMapSchema));

    Map<Utf8, Integer> resultIntegerMap = deserializeGeneric(intMapSchema,
            serializeGenericFast(intMap, intMapSchema));

    Map<Utf8, Long> resultLongMap = deserializeGeneric(longMapSchema,
            serializeGenericFast(longMap, longMapSchema));

    Map<Utf8, Double> resultDoubleMap = deserializeGeneric(doubleMapSchema,
            serializeGenericFast(doubleMap, doubleMapSchema));

    Map<Utf8, Float> resultFloatMap = deserializeGeneric(floatMapSchema,
            serializeGenericFast(floatMap, floatMapSchema));

    Map<Utf8, ByteBuffer> resultBytesMap = deserializeGeneric(bytesMapSchema,
            serializeGenericFast(bytesMap, bytesMapSchema));

    // then
    Assert.assertEquals(2, resultStringMap.size());
    Assert.assertEquals("abc", resultStringMap.get(new Utf8("1")).toString());
    Assert.assertEquals("aaa", resultStringMap.get(new Utf8("2")).toString());

    Assert.assertEquals(2, resultIntegerMap.size());
    Assert.assertEquals(Integer.valueOf(1), resultIntegerMap.get(new Utf8("1")));
    Assert.assertEquals(Integer.valueOf(2), resultIntegerMap.get(new Utf8("2")));

    Assert.assertEquals(2, resultLongMap.size());
    Assert.assertEquals(Long.valueOf(1L), resultLongMap.get(new Utf8("1")));
    Assert.assertEquals(Long.valueOf(2L), resultLongMap.get(new Utf8("2")));

    Assert.assertEquals(2, resultDoubleMap.size());
    Assert.assertEquals(Double.valueOf(1.0), resultDoubleMap.get(new Utf8("1")));
    Assert.assertEquals(Double.valueOf(2.0), resultDoubleMap.get(new Utf8("2")));

    Assert.assertEquals(2, resultFloatMap.size());
    Assert.assertEquals(Float.valueOf(1f), resultFloatMap.get(new Utf8("1")));
    Assert.assertEquals(Float.valueOf(2f), resultFloatMap.get(new Utf8("2")));

    Assert.assertEquals(2, resultBytesMap.size());
    Assert.assertEquals(0x01, resultBytesMap.get(new Utf8("1")).get());
    Assert.assertEquals(0x02, resultBytesMap.get(new Utf8("2")).get());
}
 
Example 18
Source File: AvroFlattener.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
/***
 * Flatten the Schema to un-nest recursive Records (to make it optimal for ORC)
 * @param schema Schema to flatten
 * @param shouldPopulateLineage is set to true if the field is going to be flattened and moved up the hierarchy -
 *                              so that lineage information can be tagged to it; which happens when there is a
 *                              Record within a Record OR Record within Option within Record and so on,
 *                              however not when there is a Record within Map or Array
 * @param flattenComplexTypes Flatten complex types recursively other than Record and Option
 * @return Flattened Avro Schema
 */
private Schema flatten(Schema schema, boolean shouldPopulateLineage, boolean flattenComplexTypes) {
  Schema flattenedSchema;

  // Process all Schema Types
  // (Primitives are simply cloned)
  switch (schema.getType()) {
    case ARRAY:
      // Array might be an array of recursive Records, flatten them
      if (flattenComplexTypes) {
        flattenedSchema = Schema.createArray(flatten(schema.getElementType(), false));
      } else {
        flattenedSchema = Schema.createArray(schema.getElementType());
      }
      break;
    case BOOLEAN:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case BYTES:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case DOUBLE:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case ENUM:
      flattenedSchema =
          Schema.createEnum(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.getEnumSymbols());
      break;
    case FIXED:
      flattenedSchema =
          Schema.createFixed(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.getFixedSize());
      break;
    case FLOAT:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case INT:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case LONG:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case MAP:
      if (flattenComplexTypes) {
        flattenedSchema = Schema.createMap(flatten(schema.getValueType(), false));
      } else {
        flattenedSchema = Schema.createMap(schema.getValueType());
      }
      break;
    case NULL:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case RECORD:
      flattenedSchema = flattenRecord(schema, shouldPopulateLineage, flattenComplexTypes);
      break;
    case STRING:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case UNION:
      flattenedSchema = flattenUnion(schema, shouldPopulateLineage, flattenComplexTypes);
      break;
    default:
      String exceptionMessage = String.format("Schema flattening failed for \"%s\" ", schema);
      LOG.error(exceptionMessage);

      throw new AvroRuntimeException(exceptionMessage);
  }

  // Copy schema metadata
  copyProperties(schema, flattenedSchema);

  return flattenedSchema;
}
 
Example 19
Source File: HiveSchemaConverter.java    From kite with Apache License 2.0 4 votes vote down vote up
@VisibleForTesting
static Schema convert(LinkedList<String> path, String name,
                      TypeInfo type, Collection<String[]> required) {
  switch (type.getCategory()) {
    case PRIMITIVE:
      if (type.getClass() == charClass || type.getClass() == varcharClass) {
        // this is required because type name includes length
        return Schema.create(Schema.Type.STRING);
      }

      String typeInfoName = type.getTypeName();
      Preconditions.checkArgument(TYPEINFO_TO_TYPE.containsKey(typeInfoName),
          "Cannot convert unsupported type: %s", typeInfoName);
      return Schema.create(TYPEINFO_TO_TYPE.get(typeInfoName));

    case LIST:
      return Schema.createArray(optional(convert(path, name,
          ((ListTypeInfo) type).getListElementTypeInfo(), required)));

    case MAP:
      MapTypeInfo mapType = (MapTypeInfo) type;
      Preconditions.checkArgument(
          "string".equals(mapType.getMapKeyTypeInfo().toString()),
          "Non-String map key type: %s", mapType.getMapKeyTypeInfo());

      return Schema.createMap(optional(convert(path, name,
          mapType.getMapValueTypeInfo(), required)));

    case STRUCT:
      return convert(path, name, (StructTypeInfo) type, required);

    case UNION:
      List<TypeInfo> unionTypes = ((UnionTypeInfo) type)
          .getAllUnionObjectTypeInfos();

      // add NULL so all union types are optional
      List<Schema> types = Lists.newArrayList(NULL);
      for (int i = 0; i < unionTypes.size(); i += 1) {
        // types within unions cannot be required
        types.add(convert(
            path, name + "_" + i, unionTypes.get(i), NO_REQUIRED_FIELDS));
      }

      return Schema.createUnion(types);

    default:
      throw new IllegalArgumentException(
          "Unknown TypeInfo category: " + type.getCategory());
  }
}
 
Example 20
Source File: Schemas.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
/**
 * Merges two {@link Schema} instances or returns {@code null}.
 * <p>
 * The two schemas are merged if they are the same type. Records are merged
 * if the two records have the same name or have no names but have a
 * significant number of shared fields.
 * <p>
 * @see {@link #mergeOrUnion} to return a union when a merge is not possible.
 *
 * @param left a {@code Schema}
 * @param right a {@code Schema}
 * @return a merged {@code Schema} or {@code null} if merging is not possible
 */
private static Schema mergeOnly(Schema left, Schema right) {
  if (Objects.equal(left, right)) {
    return left;
  }

  // handle primitive type promotion; doesn't promote integers to floats
  switch (left.getType()) {
    case INT:
      if (right.getType() == Schema.Type.LONG) {
        return right;
      }
      break;
    case LONG:
      if (right.getType() == Schema.Type.INT) {
        return left;
      }
      break;
    case FLOAT:
      if (right.getType() == Schema.Type.DOUBLE) {
        return right;
      }
      break;
    case DOUBLE:
      if (right.getType() == Schema.Type.FLOAT) {
        return left;
      }
  }

  // any other cases where the types don't match must be combined by a union
  if (left.getType() != right.getType()) {
    return null;
  }

  switch (left.getType()) {
    case UNION:
      return union(left, right);
    case RECORD:
      if (left.getName() == null && right.getName() == null &&
          fieldSimilarity(left, right) < SIMILARITY_THRESH) {
        return null;
      } else if (!Objects.equal(left.getName(), right.getName())) {
        return null;
      }

      Schema combinedRecord = Schema.createRecord(
          coalesce(left.getName(), right.getName()),
          coalesce(left.getDoc(), right.getDoc()),
          coalesce(left.getNamespace(), right.getNamespace()),
          false
      );
      combinedRecord.setFields(mergeFields(left, right));

      return combinedRecord;

    case MAP:
      return Schema.createMap(
          mergeOrUnion(left.getValueType(), right.getValueType()));

    case ARRAY:
      return Schema.createArray(
          mergeOrUnion(left.getElementType(), right.getElementType()));

    case ENUM:
      if (!Objects.equal(left.getName(), right.getName())) {
        return null;
      }
      Set<String> symbols = Sets.newLinkedHashSet();
      symbols.addAll(left.getEnumSymbols());
      symbols.addAll(right.getEnumSymbols());
      return Schema.createEnum(
          left.getName(),
          coalesce(left.getDoc(), right.getDoc()),
          coalesce(left.getNamespace(), right.getNamespace()),
          ImmutableList.copyOf(symbols)
      );

    default:
      // all primitives are handled before the switch by the equality check.
      // schemas that reach this point are not primitives and also not any of
      // the above known types.
      throw new UnsupportedOperationException(
          "Unknown schema type: " + left.getType());
  }
}