Java Code Examples for org.apache.avro.generic.GenericData#EnumSymbol

The following examples show how to use org.apache.avro.generic.GenericData#EnumSymbol . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestAvroDecoder.java    From presto with Apache License 2.0 7 votes vote down vote up
@Test
public void testEnumDecodedAsVarchar()
{
    Schema schema = SchemaBuilder.record("record")
            .fields()
            .name("enum_field")
            .type()
            .enumeration("Weekday")
            .symbols("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
            .noDefault()
            .endRecord();
    Schema enumType = schema.getField("enum_field").schema();
    EnumSymbol enumValue = new GenericData.EnumSymbol(enumType, "Wednesday");
    DecoderTestColumnHandle row = new DecoderTestColumnHandle(0, "row", VARCHAR, "enum_field", null, null, false, false, false);
    Map<DecoderColumnHandle, FieldValueProvider> decodedRow = buildAndDecodeColumn(row, "enum_field", enumType.toString(), enumValue);

    checkValue(decodedRow, row, "Wednesday");
}
 
Example 2
Source File: TestAvroTypeUtil.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateEnumField() throws Exception {
  String schema = "{ \"type\": \"enum\",\n" +
    "  \"name\": \"Suit\",\n" +
    "  \"symbols\" : [\"SPADES\", \"HEARTS\", \"DIAMONDS\", \"CLUBS\"]\n" +
    "}";
  Schema avroSchema = new Schema.Parser().parse(schema);
  Record record = RecordCreator.create();
  GenericData.EnumSymbol enumSymbol = new GenericData.EnumSymbol(avroSchema, "CLUBS");
  Field field = AvroTypeUtil.avroToSdcField(record, avroSchema, enumSymbol, false);

  Assert.assertEquals(Field.Type.STRING, field.getType());
  Assert.assertEquals("CLUBS", field.getValueAsString());

  record.set(field);
  Object avroObject = AvroTypeUtil.sdcRecordToAvro(record, avroSchema, new HashMap<String, Object>());
  Assert.assertTrue(avroObject instanceof GenericData.EnumSymbol);
  Assert.assertEquals("CLUBS", avroObject.toString());
}
 
Example 3
Source File: AvroRelConverter.java    From samza with Apache License 2.0 5 votes vote down vote up
public static Object convertToAvroObject(Object relObj, Schema schema) {
  if (relObj == null) {
    return null;
  }
  switch (schema.getType()) {
    case RECORD:
      return convertToGenericRecord((SamzaSqlRelRecord) relObj, getNonNullUnionSchema(schema));
    case ARRAY:
      List<Object> avroList = ((List<Object>) relObj).stream()
          .map(o -> convertToAvroObject(o, getNonNullUnionSchema(schema).getElementType()))
          .collect(Collectors.toList());
      return avroList;
    case MAP:
      return ((Map<String, ?>) relObj).entrySet()
          .stream()
          .collect(Collectors.toMap(Map.Entry::getKey,
            e -> convertToAvroObject(e.getValue(), getNonNullUnionSchema(schema).getValueType())));
    case UNION:
      for (Schema unionSchema : schema.getTypes()) {
        if (isSchemaCompatibleWithRelObj(relObj, unionSchema)) {
          return convertToAvroObject(relObj, unionSchema);
        }
      }
      return null;
    case ENUM:
      return new GenericData.EnumSymbol(schema, (String) relObj);
    case FIXED:
      return new GenericData.Fixed(schema, ((ByteString) relObj).getBytes());
    case BYTES:
      return ByteBuffer.wrap(((ByteString) relObj).getBytes());
    default:
      return relObj;
  }
}
 
Example 4
Source File: FastSerdeBenchmarkSupport.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
public static GenericData.EnumSymbol generateRandomEnumSymbol(Schema schema) {
    if (!Schema.Type.ENUM.equals(schema.getType())) {
        throw new IllegalArgumentException("input schema must be an enum schema");
    }

    return new GenericData.EnumSymbol(schema,
            schema.getEnumSymbols().get(RandomUtils.nextInt(0, schema.getEnumSymbols().size())));
}
 
Example 5
Source File: AvroResolverTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
private void assertField(List<OneField> fields, int index, Object value, DataType type) {
    assertEquals(type.getOID(), fields.get(index).type);
    if (type == DataType.BYTEA) {
        assertArrayEquals((byte[]) value, (byte[]) fields.get(index).val);
        return;
    }

    if (fields.get(index).val instanceof GenericData.EnumSymbol) {
        assertEquals(value, fields.get(index).val.toString());
        return;
    }
    assertEquals(value, fields.get(index).val);
}
 
Example 6
Source File: AvroCompatibilityHelperGenericUtilMethodsTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test
public void testCreateGenericEnum() throws Exception {
  String avsc = TestUtil.load("PerfectlyNormalEnum.avsc");
  SchemaParseResult result = AvroCompatibilityHelper.parse(avsc, SchemaParseConfiguration.STRICT, null);
  Schema schema = result.getMainSchema();

  GenericData.EnumSymbol symbol = AvroCompatibilityHelper.newEnumSymbol(schema, "A");
  Assert.assertNotNull(symbol);
  Assert.assertEquals(symbol.toString(), "A");

  symbol = AvroCompatibilityHelper.newEnumSymbol(schema, "B");
  Assert.assertNotNull(symbol);
  Assert.assertEquals(symbol.toString(), "B");
}
 
Example 7
Source File: FastSpecificDeserializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Test(groups = {"deserializationTest"}, dataProvider = "SlowFastDeserializer")
public void shouldSkipRemovedField(Boolean whetherUseFastDeserializer) throws IOException {
  // given
  Schema oldRecordSchema = Schema.parse(this.getClass().getResourceAsStream("/schema/fastserdetestold.avsc"));

  GenericData.Record subRecord =
      new GenericData.Record(oldRecordSchema.getField("subRecordUnion").schema().getTypes().get(1));
  GenericData.EnumSymbol testEnum =
      AvroCompatibilityHelper.newEnumSymbol(null, "A"); //new GenericData.EnumSymbol("A");
  GenericData.Fixed testFixed = newFixed(oldRecordSchema.getField("testFixed").schema(), new byte[]{0x01});
  GenericData.Record oldRecord = new GenericData.Record(oldRecordSchema);
  oldRecord.put("testInt", 1);
  oldRecord.put("testLong", 1l);
  oldRecord.put("testDouble", 1.0);
  oldRecord.put("testFloat", 1.0f);
  oldRecord.put("testBoolean", true);
  oldRecord.put("testBytes", ByteBuffer.wrap(new byte[]{0x01, 0x02}));
  oldRecord.put("testString", "aaa");
  oldRecord.put("testStringAlias", "abc");
  oldRecord.put("removedField", "def");
  oldRecord.put("testFixed", testFixed);
  oldRecord.put("testEnum", testEnum);

  subRecord.put("subField", "abc");
  subRecord.put("removedField", "def");
  subRecord.put("anotherField", "ghi");

  oldRecord.put("subRecordUnion", subRecord);
  oldRecord.put("subRecord", subRecord);
  oldRecord.put("recordsArray", Arrays.asList(subRecord));
  Map<String, GenericData.Record> recordsMap = new HashMap<>();
  recordsMap.put("1", subRecord);
  oldRecord.put("recordsMap", recordsMap);

  oldRecord.put("testFixedArray", Collections.emptyList());
  oldRecord.put("testFixedUnionArray", Collections.emptyList());
  oldRecord.put("testEnumArray", Collections.emptyList());
  oldRecord.put("testEnumUnionArray", Collections.emptyList());
  oldRecord.put("recordsArrayMap", Collections.emptyList());
  oldRecord.put("recordsMapArray", Collections.emptyMap());

  // when
  TestRecord record = null;
  if (whetherUseFastDeserializer) {
    record = decodeRecordFast(TestRecord.SCHEMA$, oldRecordSchema, genericDataAsDecoder(oldRecord));
  } else {
    record = decodeRecordSlow(TestRecord.SCHEMA$, oldRecordSchema, genericDataAsDecoder(oldRecord));
  }

  // then
  // alias is not well supported in avro-1.4
  if (!Utils.isAvro14()) {
    Assert.assertEquals(new Utf8("abc"), record.testStringUnion);
  }
  Assert.assertEquals(TestEnum.A, record.testEnum);
  Assert.assertEquals(new Utf8("ghi"), record.subRecordUnion.anotherField);
  Assert.assertEquals(new Utf8("ghi"), record.recordsArray.get(0).anotherField);
  Assert.assertEquals(new Utf8("ghi"), record.recordsMap.get(new Utf8("1")).anotherField);
}
 
Example 8
Source File: FastDeserializerDefaultsTest.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Test(groups = {"deserializationTest"}, dataProvider = "SlowFastDeserializer")
public void shouldAddFieldsInMiddleOfSchema(Boolean whetherUseFastDeserializer) throws IOException {
  // given
  Schema oldRecordSchema = TestRecord.SCHEMA$;

  GenericData.Record subRecord =
      new GenericData.Record(oldRecordSchema.getField("subRecordUnion").schema().getTypes().get(1));
  Schema enumSchema = Schema.createEnum("TestEnum", "", "", Arrays.asList("A", "B", "C", "D", "E"));

  GenericData.EnumSymbol testEnum = AvroCompatibilityHelper.newEnumSymbol(enumSchema, "A");
  GenericData.Fixed testFixed = new GenericData.Fixed(oldRecordSchema.getField("testFixed").schema());
  testFixed.bytes(new byte[]{0x01});
  GenericData.Record oldRecord = new GenericData.Record(oldRecordSchema);

  oldRecord.put("testInt", 1);
  oldRecord.put("testLong", 1L);
  oldRecord.put("testDouble", 1.0);
  oldRecord.put("testFloat", 1.0f);
  oldRecord.put("testBoolean", true);
  oldRecord.put("testBytes", ByteBuffer.wrap(new byte[]{0x01, 0x02}));
  oldRecord.put("testString", "aaa");
  oldRecord.put("testFixed", testFixed);
  oldRecord.put("testEnum", testEnum);

  subRecord.put("subField", "abc");
  subRecord.put("anotherField", "ghi");

  oldRecord.put("subRecordUnion", subRecord);
  oldRecord.put("subRecord", subRecord);
  oldRecord.put("recordsArray", Collections.singletonList(subRecord));
  Map<String, GenericData.Record> recordsMap = new HashMap<>();
  recordsMap.put("1", subRecord);
  oldRecord.put("recordsMap", recordsMap);

  oldRecord.put("testFixedArray", Collections.emptyList());
  oldRecord.put("testFixedUnionArray", Collections.emptyList());
  oldRecord.put("testEnumArray", Collections.emptyList());
  oldRecord.put("testEnumUnionArray", Collections.emptyList());
  oldRecord.put("recordsArrayMap", Collections.emptyList());
  oldRecord.put("recordsMapArray", Collections.emptyMap());

  Schema newRecordSchema = Schema.parse(this.getClass().getResourceAsStream("/schema/defaultsTestSubrecord.avsc"));
  // when
  GenericRecord record = null;
  if (whetherUseFastDeserializer || Utils.isAvro14()) {
    record = decodeGenericFast(newRecordSchema, oldRecordSchema, genericDataAsDecoder(oldRecord));
  } else {
    // There is a bug in Schema.applyAliases of avro-1.4, and the following invocation will trigger it.
    record = decodeGenericSlow(newRecordSchema, oldRecordSchema, genericDataAsDecoder(oldRecord));
  }

  // then
  GenericData.Record newSubRecord =
      new GenericData.Record(newRecordSchema.getField("subRecordUnion").schema().getTypes().get(1));
  newSubRecord.put("subField", new Utf8("abc"));
  newSubRecord.put("anotherField", new Utf8("ghi"));
  newSubRecord.put("newSubField", new Utf8("newSubFieldValue"));
  Map<Utf8, GenericData.Record> expectedRecordsMap = new HashMap<>();
  expectedRecordsMap.put(new Utf8("1"), newSubRecord);

  Assert.assertEquals("newSubFieldValue",
      ((GenericRecord) record.get("subRecordUnion")).get("newSubField").toString());
  Assert.assertEquals("newFieldValue", record.get("newField").toString());
  Assert.assertEquals(1, record.get("testInt"));
  Assert.assertEquals(1L, record.get("testLong"));
  Assert.assertEquals(1.0, record.get("testDouble"));
  Assert.assertEquals(1.0f, record.get("testFloat"));
  Assert.assertEquals(true, record.get("testBoolean"));
  Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytes"));
  Assert.assertEquals(new Utf8("aaa"), record.get("testString"));
  Assert.assertEquals(testFixed, record.get("testFixed"));
  Assert.assertEquals(testEnum, record.get("testEnum"));
  Assert.assertEquals(newSubRecord, record.get("subRecordUnion"));

  Assert.assertTrue(Arrays.asList(newSubRecord).equals(record.get("recordsArray")));

  Assert.assertEquals(expectedRecordsMap, record.get("recordsMap"));
  Assert.assertTrue(Collections.emptyList().equals(record.get("testFixedArray")));
  Assert.assertTrue(Collections.emptyList().equals(record.get("testFixedUnionArray")));
  Assert.assertTrue(Collections.emptyList().equals(record.get("testEnumArray")));
  Assert.assertTrue(Collections.emptyList().equals(record.get("testEnumUnionArray")));
  Assert.assertTrue(Collections.emptyList().equals(record.get("recordsArrayMap")));
  Assert.assertEquals(Collections.emptyMap(), record.get("recordsMapArray"));
}
 
Example 9
Source File: JsonElementConversionFactory.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Override
Object convertField(JsonElement value) {
  return new GenericData.EnumSymbol(this.schema, value.getAsString());
}
 
Example 10
Source File: Avro17Adapter.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Override
public GenericData.EnumSymbol newEnumSymbol(Schema enumSchema, String enumValue) {
  return new GenericData.EnumSymbol(enumSchema, enumValue);
}
 
Example 11
Source File: FastSpecificDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldReadAliasedField() throws IOException {
    // given
    Schema.Parser parser = new Schema.Parser();
    Schema oldRecordSchema = parser.parse(this.getClass().getResourceAsStream("/schema/fastserdetestold.avsc"));
    GenericData.Record subRecord = new GenericData.Record(oldRecordSchema.getField("subRecordUnion").schema()
            .getTypes().get(1));
    GenericData.EnumSymbol testEnum = new GenericData.EnumSymbol(oldRecordSchema.getField("testEnum").schema(),
            "A");
    GenericData.Fixed testFixed = new GenericData.Fixed(oldRecordSchema.getField("testFixed").schema(),
            new byte[] { 0x01 });
    GenericData.Record oldRecord = new GenericData.Record(oldRecordSchema);
    oldRecord.put("testInt", 1);
    oldRecord.put("testLong", 1l);
    oldRecord.put("testDouble", 1.0);
    oldRecord.put("testFloat", 1.0f);
    oldRecord.put("testBoolean", true);
    oldRecord.put("testBytes", ByteBuffer.wrap(new byte[] { 0x01, 0x02 }));
    oldRecord.put("testString", "aaa");
    oldRecord.put("testFixed", testFixed);
    oldRecord.put("testFixedUnion", testFixed);
    oldRecord.put("testFixedArray", Arrays.asList(testFixed));
    oldRecord.put("testFixedUnionArray", Arrays.asList(testFixed));
    oldRecord.put("testEnum", testEnum);
    oldRecord.put("testEnumUnion", testEnum);
    oldRecord.put("testEnumArray", Arrays.asList(testEnum));
    oldRecord.put("testEnumUnionArray", Arrays.asList(testEnum));

    oldRecord.put("subRecordUnion", subRecord);
    oldRecord.put("subRecord", subRecord);

    oldRecord.put("recordsArray", Collections.emptyList());
    oldRecord.put("recordsArrayMap", Collections.emptyList());
    oldRecord.put("recordsMap", Collections.emptyMap());
    oldRecord.put("recordsMapArray", Collections.emptyMap());

    oldRecord.put("testStringAlias", "abc");

    // when
    TestRecord record = deserializeSpecificFast(TestRecord.getClassSchema(), oldRecordSchema,
            serializeGeneric(oldRecord));

    // then
    Assert.assertEquals("abc", record.getTestStringUnion());
}
 
Example 12
Source File: AvroCompatibilityHelper.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public static GenericData.EnumSymbol newEnumSymbol(Schema avroSchema, String enumValue) {
  return FACTORY.newEnumSymbol(avroSchema, enumValue);
}
 
Example 13
Source File: Avro15Adapter.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Override
public GenericData.EnumSymbol newEnumSymbol(Schema enumSchema, String enumValue) {
  return new GenericData.EnumSymbol(enumSchema, enumValue);
}
 
Example 14
Source File: TestReadWrite.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testAll() throws Exception {
  Schema schema = new Schema.Parser().parse(
      Resources.getResource("all.avsc").openStream());

  Path file = new Path(createTempFile().getPath());
  List<Integer> integerArray = Arrays.asList(1, 2, 3);
  GenericData.Record nestedRecord = new GenericRecordBuilder(
    schema.getField("mynestedrecord").schema())
    .set("mynestedint", 1).build();
  List<Integer> emptyArray = new ArrayList<Integer>();
  Schema arrayOfOptionalIntegers = Schema.createArray(
    optional(Schema.create(Schema.Type.INT)));
  GenericData.Array<Integer> genericIntegerArrayWithNulls =
    new GenericData.Array<Integer>(
      arrayOfOptionalIntegers,
      Arrays.asList(1, null, 2, null, 3));
  GenericFixed genericFixed = new GenericData.Fixed(
    Schema.createFixed("fixed", null, null, 1), new byte[]{(byte) 65});
  ImmutableMap<String, Integer> emptyMap = new ImmutableMap.Builder<String, Integer>().build();

  try(ParquetWriter<GenericRecord> writer = AvroParquetWriter
      .<GenericRecord>builder(file)
      .withSchema(schema)
      .withConf(testConf)
      .build()) {

    GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
      Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);

    GenericData.Record record = new GenericRecordBuilder(schema)
      .set("mynull", null)
      .set("myboolean", true)
      .set("myint", 1)
      .set("mylong", 2L)
      .set("myfloat", 3.1f)
      .set("mydouble", 4.1)
      .set("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)))
      .set("mystring", "hello")
      .set("mynestedrecord", nestedRecord)
      .set("myenum", "a")
      .set("myarray", genericIntegerArray)
      .set("myemptyarray", emptyArray)
      .set("myoptionalarray", genericIntegerArray)
      .set("myarrayofoptional", genericIntegerArrayWithNulls)
      .set("mymap", ImmutableMap.of("a", 1, "b", 2))
      .set("myemptymap", emptyMap)
      .set("myfixed", genericFixed)
      .build();

    writer.write(record);
  }

  final GenericRecord nextRecord;
  try(AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file)) {
    nextRecord = reader.read();
  }

  Object expectedEnumSymbol = compat ? "a" :
      new GenericData.EnumSymbol(schema.getField("myenum").schema(), "a");

  assertNotNull(nextRecord);
  assertEquals(null, nextRecord.get("mynull"));
  assertEquals(true, nextRecord.get("myboolean"));
  assertEquals(1, nextRecord.get("myint"));
  assertEquals(2L, nextRecord.get("mylong"));
  assertEquals(3.1f, nextRecord.get("myfloat"));
  assertEquals(4.1, nextRecord.get("mydouble"));
  assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)), nextRecord.get("mybytes"));
  assertEquals(str("hello"), nextRecord.get("mystring"));
  assertEquals(expectedEnumSymbol, nextRecord.get("myenum"));
  assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
  assertEquals(integerArray, nextRecord.get("myarray"));
  assertEquals(emptyArray, nextRecord.get("myemptyarray"));
  assertEquals(integerArray, nextRecord.get("myoptionalarray"));
  assertEquals(genericIntegerArrayWithNulls, nextRecord.get("myarrayofoptional"));
  assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap"));
  assertEquals(emptyMap, nextRecord.get("myemptymap"));
  assertEquals(genericFixed, nextRecord.get("myfixed"));
}
 
Example 15
Source File: FastSpecificDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldSkipRemovedField() throws IOException {
    // given
    Schema.Parser parser = new Schema.Parser();
    Schema oldRecordSchema = parser.parse(this.getClass().getResourceAsStream("/schema/fastserdetestold.avsc"));

    GenericData.Record subRecord = new GenericData.Record(oldRecordSchema.getField("subRecordUnion").schema()
            .getTypes().get(1));
    GenericData.EnumSymbol testEnum = new GenericData.EnumSymbol(oldRecordSchema.getField("testEnum").schema(),
            "A");
    GenericData.Fixed testFixed = new GenericData.Fixed(oldRecordSchema.getField("testFixed").schema(),
            new byte[] { 0x01 });
    GenericData.Record oldRecord = new GenericData.Record(oldRecordSchema);
    oldRecord.put("testInt", 1);
    oldRecord.put("testLong", 1l);
    oldRecord.put("testDouble", 1.0);
    oldRecord.put("testFloat", 1.0f);
    oldRecord.put("testBoolean", true);
    oldRecord.put("testBytes", ByteBuffer.wrap(new byte[] { 0x01, 0x02 }));
    oldRecord.put("testString", "aaa");
    oldRecord.put("testStringAlias", "abc");
    oldRecord.put("removedField", "def");
    oldRecord.put("testFixed", testFixed);
    oldRecord.put("testEnum", testEnum);

    subRecord.put("subField", "abc");
    subRecord.put("removedField", "def");
    subRecord.put("anotherField", "ghi");

    oldRecord.put("subRecordUnion", subRecord);
    oldRecord.put("subRecord", subRecord);
    oldRecord.put("recordsArray", Arrays.asList(subRecord));
    Map<String, GenericData.Record> recordsMap = new HashMap<>();
    recordsMap.put("1", subRecord);
    oldRecord.put("recordsMap", recordsMap);

    oldRecord.put("testFixedArray", Collections.emptyList());
    oldRecord.put("testFixedUnionArray", Collections.emptyList());
    oldRecord.put("testEnumArray", Collections.emptyList());
    oldRecord.put("testEnumUnionArray", Collections.emptyList());
    oldRecord.put("recordsArrayMap", Collections.emptyList());
    oldRecord.put("recordsMapArray", Collections.emptyMap());

    // when
    TestRecord record = deserializeSpecificFast(TestRecord.getClassSchema(), oldRecordSchema,
            serializeGeneric(oldRecord));

    // then
    Assert.assertEquals("abc", record.getTestStringUnion());
    Assert.assertEquals(TestEnum.A, record.getTestEnum());
    Assert.assertEquals("ghi", record.getSubRecordUnion().getAnotherField());
    Assert.assertEquals("ghi", record.getRecordsArray().get(0).getAnotherField());
    Assert.assertEquals("ghi", record.getRecordsMap().get("1").getAnotherField());
}
 
Example 16
Source File: AVROIntermediateDataFormat.java    From sqoop-on-spark with Apache License 2.0 4 votes vote down vote up
private GenericRecord toAVRO(Object[] objectArray) {

    if (objectArray == null) {
      return null;
    }
    Column[] columns = schema.getColumnsArray();

    if (objectArray.length != columns.length) {
      throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001,
          "The data " + objectArray.toString() + " has the wrong number of fields.");
    }
    // get avro schema from sqoop schema
    GenericRecord avroObject = new GenericData.Record(avroSchema);
    for (int i = 0; i < objectArray.length; i++) {
      if (objectArray[i] == null && !columns[i].isNullable()) {
        throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0005,
            columns[i].getName() + " does not support null values");
      }
      if (objectArray[i] == null) {
        avroObject.put(columns[i].getName(), null);
        continue;
      }

      switch (columns[i].getType()) {
      case ARRAY:
      case SET:
        avroObject.put(columns[i].getName(), toList((Object[]) objectArray[i]));
        break;
      case MAP:
        avroObject.put(columns[i].getName(), objectArray[i]);
        break;
      case ENUM:
        GenericData.EnumSymbol enumValue = new GenericData.EnumSymbol(createEnumSchema(columns[i]),
            (String) objectArray[i]);
        avroObject.put(columns[i].getName(), enumValue);
        break;
      case TEXT:
        avroObject.put(columns[i].getName(), new Utf8((String) objectArray[i]));
        break;
      case BINARY:
      case UNKNOWN:
        avroObject.put(columns[i].getName(), ByteBuffer.wrap((byte[]) objectArray[i]));
        break;
      case FIXED_POINT:
      case FLOATING_POINT:
        avroObject.put(columns[i].getName(), objectArray[i]);
        break;
      case DECIMAL:
        // TODO: store as FIXED in SQOOP-16161
        avroObject.put(columns[i].getName(), ((BigDecimal) objectArray[i]).toPlainString());
        break;
      case DATE_TIME:
        if (objectArray[i] instanceof org.joda.time.DateTime) {
          avroObject.put(columns[i].getName(), ((org.joda.time.DateTime) objectArray[i]).toDate()
              .getTime());
        } else if (objectArray[i] instanceof org.joda.time.LocalDateTime) {
          avroObject.put(columns[i].getName(), ((org.joda.time.LocalDateTime) objectArray[i])
              .toDate().getTime());
        }
        break;
      case TIME:
        avroObject.put(columns[i].getName(), ((org.joda.time.LocalTime) objectArray[i])
            .toDateTimeToday().getMillis());
        break;
      case DATE:
        avroObject.put(columns[i].getName(), ((org.joda.time.LocalDate) objectArray[i]).toDate()
            .getTime());
        break;
      case BIT:
        avroObject.put(columns[i].getName(), Boolean.valueOf((Boolean) objectArray[i]));
        break;
      default:
        throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001,
            "Column type from schema was not recognized for " + columns[i].getType());
      }
    }

    return avroObject;
  }
 
Example 17
Source File: AvroRecordInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Helper method to test GenericRecord serialisation.
 *
 * @param format
 *            the format to test
 * @param parameters
 *            the configuration to use
 * @throws IOException
 *             thrown id there is a issue
 */
@SuppressWarnings("unchecked")
private void doTestDeserializationGenericRecord(final AvroInputFormat<GenericRecord> format,
		final Configuration parameters) throws IOException {
	try {
		format.configure(parameters);
		FileInputSplit[] splits = format.createInputSplits(1);
		assertEquals(splits.length, 1);
		format.open(splits[0]);

		GenericRecord u = format.nextRecord(null);
		assertNotNull(u);
		assertEquals("The schemas should be equal", userSchema, u.getSchema());

		String name = u.get("name").toString();
		assertNotNull("empty record", name);
		assertEquals("name not equal", TEST_NAME, name);

		// check arrays
		List<CharSequence> sl = (List<CharSequence>) u.get("type_array_string");
		assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
		assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());

		List<Boolean> bl = (List<Boolean>) u.get("type_array_boolean");
		assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
		assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));

		// check enums
		GenericData.EnumSymbol enumValue = (GenericData.EnumSymbol) u.get("type_enum");
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), enumValue.toString());

		// check maps
		Map<CharSequence, Long> lm = (Map<CharSequence, Long>) u.get("type_map");
		assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
		assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());

		assertFalse("expecting second element", format.reachedEnd());
		assertNotNull("expecting second element", format.nextRecord(u));

		assertNull(format.nextRecord(u));
		assertTrue(format.reachedEnd());
	} finally {
		format.close();
	}
}
 
Example 18
Source File: FastDeserializerDefaultsTest.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldAddFieldsInMiddleOfSchema() throws IOException {
    // given
    Schema.Parser parser = new Schema.Parser();
    Schema oldRecordSchema = TestRecord.getClassSchema();

    GenericData.Record subRecord = new GenericData.Record(oldRecordSchema.getField("subRecordUnion").schema()
            .getTypes().get(1));
    GenericData.EnumSymbol testEnum = new GenericData.EnumSymbol(
            oldRecordSchema.getField("testEnum").schema(), "A");
    GenericData.Fixed testFixed = new GenericData.Fixed(oldRecordSchema.getField("testFixed").schema(),
            new byte[]{0x01});
    GenericData.Record oldRecord = new GenericData.Record(oldRecordSchema);

    oldRecord.put("testInt", 1);
    oldRecord.put("testLong", 1L);
    oldRecord.put("testDouble", 1.0);
    oldRecord.put("testFloat", 1.0f);
    oldRecord.put("testBoolean", true);
    oldRecord.put("testBytes", ByteBuffer.wrap(new byte[]{0x01, 0x02}));
    oldRecord.put("testString", "aaa");
    oldRecord.put("testFixed", testFixed);
    oldRecord.put("testEnum", testEnum);

    subRecord.put("subField", "abc");
    subRecord.put("anotherField", "ghi");

    oldRecord.put("subRecordUnion", subRecord);
    oldRecord.put("subRecord", subRecord);
    oldRecord.put("recordsArray", Collections.singletonList(subRecord));
    Map<Utf8, GenericData.Record> recordsMap = new HashMap<>();
    recordsMap.put(new Utf8("1"), subRecord);
    oldRecord.put("recordsMap", recordsMap);

    oldRecord.put("testFixedArray", Collections.emptyList());
    oldRecord.put("testFixedUnionArray", Collections.emptyList());
    oldRecord.put("testEnumArray", Collections.emptyList());
    oldRecord.put("testEnumUnionArray", Collections.emptyList());
    oldRecord.put("recordsArrayMap", Collections.emptyList());
    oldRecord.put("recordsMapArray", Collections.emptyMap());

    Schema newRecordSchema = parser
            .parse(this.getClass().getResourceAsStream("/schema/defaultsTestSubrecord.avsc"));
    // when
    GenericRecord record = deserializeGenericFast(newRecordSchema, oldRecordSchema, serializeGeneric(oldRecord));

    // then
    GenericData.Record newSubRecord = new GenericData.Record(newRecordSchema.getField("subRecordUnion")
            .schema().getTypes().get(1));
    newSubRecord.put("subField", "abc");
    newSubRecord.put("anotherField", "ghi");
    newSubRecord.put("newSubField", "newSubFieldValue");
    recordsMap.put(new Utf8("1"), newSubRecord);

    Assert.assertEquals("newSubFieldValue",
            ((GenericRecord) record.get("subRecordUnion")).get("newSubField").toString());
    Assert.assertEquals("newFieldValue", record.get("newField").toString());
    Assert.assertEquals(1, record.get("testInt"));
    Assert.assertEquals(1L, record.get("testLong"));
    Assert.assertEquals(1.0, record.get("testDouble"));
    Assert.assertEquals(1.0f, record.get("testFloat"));
    Assert.assertEquals(true, record.get("testBoolean"));
    Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytes"));
    Assert.assertEquals("aaa", record.get("testString").toString());
    Assert.assertEquals(testFixed, record.get("testFixed"));
    Assert.assertEquals(testEnum, record.get("testEnum"));
    Assert.assertEquals(newSubRecord, record.get("subRecordUnion"));

    Assert.assertEquals(Collections.singletonList(newSubRecord), record.get("recordsArray"));
    Assert.assertEquals(recordsMap, record.get("recordsMap"));
    Assert.assertEquals(Collections.emptyList(), record.get("testFixedArray"));
    Assert.assertEquals(Collections.emptyList(), record.get("testFixedUnionArray"));
    Assert.assertEquals(Collections.emptyList(), record.get("testEnumArray"));
    Assert.assertEquals(Collections.emptyList(), record.get("testEnumUnionArray"));
    Assert.assertEquals(Collections.emptyList(), record.get("recordsArrayMap"));
    Assert.assertEquals(Collections.emptyMap(), record.get("recordsMapArray"));

}
 
Example 19
Source File: AvroRecordInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Helper method to test GenericRecord serialisation.
 *
 * @param format
 *            the format to test
 * @param parameters
 *            the configuration to use
 * @throws IOException
 *             thrown id there is a issue
 */
@SuppressWarnings("unchecked")
private void doTestDeserializationGenericRecord(final AvroInputFormat<GenericRecord> format,
		final Configuration parameters) throws IOException {
	try {
		format.configure(parameters);
		FileInputSplit[] splits = format.createInputSplits(1);
		assertEquals(splits.length, 1);
		format.open(splits[0]);

		GenericRecord u = format.nextRecord(null);
		assertNotNull(u);
		assertEquals("The schemas should be equal", userSchema, u.getSchema());

		String name = u.get("name").toString();
		assertNotNull("empty record", name);
		assertEquals("name not equal", TEST_NAME, name);

		// check arrays
		List<CharSequence> sl = (List<CharSequence>) u.get("type_array_string");
		assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
		assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());

		List<Boolean> bl = (List<Boolean>) u.get("type_array_boolean");
		assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
		assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));

		// check enums
		GenericData.EnumSymbol enumValue = (GenericData.EnumSymbol) u.get("type_enum");
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), enumValue.toString());

		// check maps
		Map<CharSequence, Long> lm = (Map<CharSequence, Long>) u.get("type_map");
		assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
		assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());

		assertFalse("expecting second element", format.reachedEnd());
		assertNotNull("expecting second element", format.nextRecord(u));

		assertNull(format.nextRecord(u));
		assertTrue(format.reachedEnd());
	} finally {
		format.close();
	}
}
 
Example 20
Source File: AvroCompatibilityHelper.java    From avro-util with BSD 2-Clause "Simplified" License 2 votes vote down vote up
/**
 * creates a new {@link org.apache.avro.generic.GenericData.EnumSymbol} of the given schema with the given value
 * @param enumSchema enum schema
 * @param enumValue enum value (symbol)
 * @return a new {@link org.apache.avro.generic.GenericData.EnumSymbol}
 */
public static GenericData.EnumSymbol newEnumSymbol(Schema enumSchema, String enumValue) {
  assertAvroAvailable();
  return ADAPTER.newEnumSymbol(enumSchema, enumValue);
}