Java Code Examples for org.apache.avro.generic.GenericRecord#put()

The following examples show how to use org.apache.avro.generic.GenericRecord#put() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KeyValueUtilsTest.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * From the input: {"name": "testdata", "data": {"a": "a", "b": "b", "c": "c"}}
 * 
 * Extract elements: "name", "data.a", "data.b" and "data.c"
 * 
 * The result should be:
 * 
 * key: {"name": "testdata", "data": {"a": "a", "b": "b", "c": "c"}}
 * 
 * value: null
 */
@Test
public void test_Hierarchical_EverythingIsAKey() throws Exception {
    GenericRecord inputRecord = new GenericRecordBuilder(inputHierarchicalSchema) //
            .set("name", "testdata") //
            .build();
    inputRecord.put("data",
            new GenericRecordBuilder(inputSimpleSchema) //
                    .set("a", "a") //
                    .set("b", "b") //
                    .set("c", "c") //
                    .build());

    List<String> keyList = Arrays.asList("name", "data");

    String transformedIndexedRecord = ("{'key': {'name': 'testdata', 'data': {'a': 'a', 'b': 'b', 'c': 'c'}}, "
            + "'value': {}}").replaceAll("\\'", "\"");
    IndexedRecord outputRecord = KeyValueUtils.transformToKV(inputRecord,
            SchemaGeneratorUtils.extractKeyValues(inputRecord.getSchema(), keyList));
    assertEquals(transformedIndexedRecord, outputRecord.toString());

    Schema kvSchema = SchemaGeneratorUtils.mergeKeyValues(outputRecord.getSchema());
    String mergedRecord = ("{'name': 'testdata', 'data': {'a': 'a', 'b': 'b', 'c': 'c'}}").replaceAll("\\'", "\"");
    assertEquals(mergedRecord, KeyValueUtils.transformFromKV(outputRecord, kvSchema).toString());
}
 
Example 2
Source File: StreamlineEventSerializer.java    From streamline with Apache License 2.0 6 votes vote down vote up
private static Object getAvroValue(Object input, Schema schema) {
    if (input instanceof byte[] && Schema.Type.FIXED.equals(schema.getType())) {
        return new GenericData.Fixed(schema, (byte[]) input);
    } else if (input instanceof Map && !((Map) input).isEmpty()) {
        GenericRecord result;
        result = new GenericData.Record(schema);
        for (Map.Entry<String, Object> entry: ((Map<String, Object>) input).entrySet()) {
            result.put(entry.getKey(), getAvroValue(entry.getValue(), schema.getField(entry.getKey()).schema()));
        }
        return result;
    } else if (input instanceof Collection && !((Collection) input).isEmpty()) {
        // for array even though we(Schema in streamline registry) support different types of elements in an array, avro expects an array
        // schema to have elements of same type. Hence, for now we will restrict array to have elements of same type. Other option is convert
        // a  streamline Schema Array field to Record in avro. However, with that the issue is that avro Field constructor does not allow a
        // null name. We could potentiall hack it by plugging in a dummy name like arrayfield, but seems hacky so not taking that path
        List<Object> values = new ArrayList<>(((Collection) input).size());
        for (Object value: (Collection) input) {
            values.add(getAvroValue(value, schema.getElementType()));
        }
        return new GenericData.Array<Object>(schema, values);
    } else {
        return input;
    }
}
 
Example 3
Source File: TestReadParquetAfterSchemaEvolution.java    From kite with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void setup() throws IOException {
  fs = LocalFileSystem.getInstance();
  testDirectory = new Path(Files.createTempDir().getAbsolutePath());
  FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(),
      testDirectory);
  Dataset<GenericRecord> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder()
                                 .schema(DatasetTestUtilities.OLD_VALUE_SCHEMA)
                                 .format(Formats.PARQUET)
                                 .build(), GenericRecord.class);
  
  DatasetWriter<GenericRecord> writer = writerDataset.newWriter();
  
  GenericRecord record = new GenericData.Record(DatasetTestUtilities.OLD_VALUE_SCHEMA);
  for (long i = 0; i < totalRecords; i++) {
    record.put("value", Long.valueOf(i));
    writer.write(record);
  }
  writer.close();
  
  repo.update("ns", "test", new DatasetDescriptor.Builder(writerDataset.getDescriptor())
    .schema(Value.class).build());

  readerDataset = repo.load("ns", "test", GenericRecord.class);
}
 
Example 4
Source File: AvroStreamsSnapshotDeserializerTest.java    From streamline with Apache License 2.0 6 votes vote down vote up
private GenericRecord generateGenericRecord(Schema schema) {
    GenericRecord addressRecord = new GenericData.Record(schema.getField("address").schema());
    long now = System.currentTimeMillis();
    addressRecord.put("streetaddress", "streetaddress:" + now);
    addressRecord.put("city", "city-" + now);
    addressRecord.put("state", "state-" + now);
    addressRecord.put("zip", "zip" + now);

    GenericRecord rootRecord = new GenericData.Record(schema);
    rootRecord.put("xid", now);
    rootRecord.put("name", "name-" + now);
    rootRecord.put("version", 1);
    rootRecord.put("timestamp", now);
    rootRecord.put("suit", "SPADES");
    rootRecord.put("address", addressRecord);

    return rootRecord;
}
 
Example 5
Source File: CopyableGenericRecordTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
public void testCopy() throws CopyNotSupportedException {
  GenericRecord record = new GenericData.Record(new Schema.Parser().parse(AVRO_SCHEMA));
  record.put("name", "foo");
  record.put("favorite_number", 68);
  record.put("favorite_colors", Arrays.asList("blue", "black", "red"));
  CopyableGenericRecord copyableGenericRecord = new CopyableGenericRecord(record);
  GenericRecord copy = copyableGenericRecord.copy();
  Assert.assertEquals(record, copy);
  copy.put("name", "bar");
  Assert.assertNotEquals(record, copy);

}
 
Example 6
Source File: TestHoodieAvroUtils.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Test
public void testJsonNodeNullWithDefaultValues() {
  List<Schema.Field> fields = new ArrayList<>();
  Schema initialSchema = Schema.createRecord("test_record", "test record", "org.test.namespace", false);
  Schema.Field field1 = new Schema.Field("key", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
  Schema.Field field2 = new Schema.Field("key1", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
  Schema.Field field3 = new Schema.Field("key2", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
  fields.add(field1);
  fields.add(field2);
  fields.add(field3);
  initialSchema.setFields(fields);
  GenericRecord rec = new GenericData.Record(initialSchema);
  rec.put("key", "val");
  rec.put("key1", "val1");
  rec.put("key2", "val2");

  List<Schema.Field> evolvedFields = new ArrayList<>();
  Schema evolvedSchema = Schema.createRecord("evolved_record", "evolved record", "org.evolved.namespace", false);
  Schema.Field evolvedField1 = new Schema.Field("key", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
  Schema.Field evolvedField2 = new Schema.Field("key1", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
  Schema.Field evolvedField3 = new Schema.Field("key2", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
  Schema.Field evolvedField4 = new Schema.Field("evolved_field", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
  Schema.Field evolvedField5 = new Schema.Field("evolved_field1", HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
  evolvedFields.add(evolvedField1);
  evolvedFields.add(evolvedField2);
  evolvedFields.add(evolvedField3);
  evolvedFields.add(evolvedField4);
  evolvedFields.add(evolvedField5);
  evolvedSchema.setFields(evolvedFields);

  GenericRecord rec1 = HoodieAvroUtils.rewriteRecord(rec, evolvedSchema);
  //evolvedField4.defaultVal() returns a JsonProperties.Null instance.
  assertNull(rec1.get("evolved_field"));
  //evolvedField5.defaultVal() returns null.
  assertNull(rec1.get("evolved_field1"));
}
 
Example 7
Source File: AvroUtilsTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
public void overrideNameAndNamespaceTest() throws IOException{

  String inputName = "input_name";
  String inputNamespace = "input_namespace";
  String outputName = "output_name";
  String outputNamespace = "output_namespace";

  Schema inputRecordSchema = SchemaBuilder.record(inputName).namespace(inputNamespace).fields()
      .name("integer1")
      .type().intBuilder().endInt().noDefault()
      .endRecord();

  GenericRecord inputRecord = new GenericData.Record(inputRecordSchema);
  inputRecord.put("integer1", 10);

  GenericRecord outputRecord = AvroUtils.overrideNameAndNamespace(inputRecord, outputName, Optional.of(Collections.EMPTY_MAP));
  Assert.assertEquals(outputRecord.getSchema().getName(), outputName);
  Assert.assertEquals(outputRecord.getSchema().getNamespace(), inputNamespace);
  Assert.assertEquals(outputRecord.get("integer1"), 10);

  Map<String,String> namespaceOverrideMap = new HashMap<>();
  namespaceOverrideMap.put(inputNamespace,outputNamespace);

  outputRecord = AvroUtils.overrideNameAndNamespace(inputRecord, outputName, Optional.of(namespaceOverrideMap));
  Assert.assertEquals(outputRecord.getSchema().getName(), outputName);
  Assert.assertEquals(outputRecord.getSchema().getNamespace(), outputNamespace);
  Assert.assertEquals(outputRecord.get("integer1"), 10);

}
 
Example 8
Source File: AvroToBytesConverterTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerialization()
    throws DataConversionException, IOException, SchemaConversionException {
  Schema inputSchema = new Schema.Parser()
      .parse(getClass().getClassLoader().getResourceAsStream("converter/bytes_to_avro/test_record_schema.avsc"));

  AvroToBytesConverter converter = new AvroToBytesConverter();
  WorkUnitState state = new WorkUnitState();
  converter.init(state);
  String outputSchema = converter.convertSchema(inputSchema, state);

  // Write a record twice to make sure nothing goes wrong with caching
  for (int i = 0; i < 2; i++) {
    GenericRecord testRecord = new GenericData.Record(inputSchema);
    testRecord.put("testStr", "testing12" + ((i == 0) ? "3": "4"));
    testRecord.put("testInt", -2);

    Iterator<byte[]> records = converter.convertRecord(outputSchema, testRecord, state).iterator();
    byte[] record = records.next();

    Assert.assertFalse(records.hasNext());
    byte[] expectedRecord = IOUtils.toByteArray(getClass().getClassLoader().getResourceAsStream("converter/bytes_to_avro/test_record_binary.avro"));

    // the serialized record was serialized with testing123 as the string; if we write testing124 out
    // contents should be the same except for the 10th byte which will be '4' instead of '3'
    if (i == 1) {
      expectedRecord[10] = 52;
    }

    Assert.assertEquals(outputSchema, inputSchema.toString());
    Assert.assertEquals(record, expectedRecord);
  }
}
 
Example 9
Source File: AvroSpoolDirSourceTestUtil.java    From datacollector with Apache License 2.0 5 votes vote down vote up
public static File createAvroDataFile() throws Exception {
  File f = new File(createTestDir(), "file-0.avro");
  Schema schema = new Schema.Parser().parse(AVRO_SCHEMA);
  GenericRecord boss = new GenericData.Record(schema);
  boss.put("name", "boss");
  boss.put("age", 60);
  boss.put("emails", ImmutableList.of("[email protected]", "[email protected]"));
  boss.put("boss", null);

  GenericRecord e3 = new GenericData.Record(schema);
  e3.put("name", "c");
  e3.put("age", 50);
  e3.put("emails", ImmutableList.of("[email protected]", "[email protected]"));
  e3.put("boss", boss);

  GenericRecord e2 = new GenericData.Record(schema);
  e2.put("name", "b");
  e2.put("age", 40);
  e2.put("emails", ImmutableList.of("[email protected]", "[email protected]"));
  e2.put("boss", boss);

  GenericRecord e1 = new GenericData.Record(schema);
  e1.put("name", "a");
  e1.put("age", 30);
  e1.put("emails", ImmutableList.of("[email protected]", "[email protected]"));
  e1.put("boss", boss);

  DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
  DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
  dataFileWriter.create(schema, f);
  dataFileWriter.append(e1);
  dataFileWriter.append(e2);
  dataFileWriter.append(e3);

  dataFileWriter.flush();
  dataFileWriter.close();

  return f;
}
 
Example 10
Source File: OracleGenericMessage.java    From DBus with Apache License 2.0 5 votes vote down vote up
public GenericRecord generateRecord(Schema genericSchema) {
    GenericRecord record = new GenericData.Record(genericSchema);

    record.put(NAMESAPCE, this.nameSpace);
    record.put(SCHEMA_HASH, this.schemaHash);
    record.put(PAYLOAD, ByteBuffer.wrap(this.payload));

    return record;
}
 
Example 11
Source File: AvroSchemaRegistryClientUtil.java    From registry with Apache License 2.0 5 votes vote down vote up
public static Object createGenericRecordForCompatDevice() throws IOException {
    Schema schema = new Schema.Parser().parse(getSchema("/device-compat.avsc"));

    GenericRecord avroRecord = new GenericData.Record(schema);
    long now = System.currentTimeMillis();
    avroRecord.put("xid", now);
    avroRecord.put("name", "foo-" + now);
    avroRecord.put("version", new Random().nextInt());
    avroRecord.put("timestamp", now);
    avroRecord.put("make", "make-" + now);

    return avroRecord;
}
 
Example 12
Source File: TestAvroExport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
private void addExtraColumns(GenericRecord record, int rowNum,
    ColumnGenerator[] extraCols) {
  int colNum = 0;
  for (ColumnGenerator gen : extraCols) {
    if (gen.getColumnAvroSchema() != null) {
      record.put(forIdx(colNum++), gen.getExportValue(rowNum));
    }
  }
}
 
Example 13
Source File: TestHoodieAvroUtils.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Test
public void testMetadataField() {
  GenericRecord rec = new GenericData.Record(new Schema.Parser().parse(EXAMPLE_SCHEMA));
  rec.put("_row_key", "key1");
  rec.put("non_pii_col", "val1");
  rec.put("pii_col", "val2");
  rec.put("timestamp", 3.5);
  GenericRecord rec1 = HoodieAvroUtils.rewriteRecord(rec, new Schema.Parser().parse(SCHEMA_WITH_METADATA_FIELD));
  assertNull(rec1.get("_hoodie_commit_time"));
  assertNull(rec1.get("nullable_field"));
  assertNull(rec1.get("nullable_field_wo_default"));
}
 
Example 14
Source File: AvroOutputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void output(final AvroOutputFormat<GenericRecord> outputFormat, Schema schema) throws IOException {
	outputFormat.configure(new Configuration());
	outputFormat.open(1, 1);
	for (int i = 0; i < 100; i++) {
		GenericRecord record = new GenericData.Record(schema);
		record.put("user_name", "testUser");
		record.put("favorite_number", 1);
		record.put("favorite_color", "blue");
		outputFormat.writeRecord(record);
	}
	outputFormat.close();
}
 
Example 15
Source File: FetchParquetTest.java    From nifi with Apache License 2.0 5 votes vote down vote up
private void writeParquetUsersWithNullableArray(final File parquetFile, int numUsers) throws IOException {
    if (parquetFile.exists()) {
        Assert.assertTrue(parquetFile.delete());
    }

    final AvroParquetWriter.Builder<GenericRecord> writerBuilder = createAvroParquetWriter(parquetFile, schemaWithNullableArray);

    // use the schemaWithArray here just to get the schema for the array part of the favorite_colors fields, the overall
    // schemaWithNullableArray has a union of the array schema and null
    final Schema favoriteColorsSchema = schemaWithArray.getField("favorite_colors").schema();

    try (final ParquetWriter<GenericRecord> writer = writerBuilder.build()) {
        for (int i=0; i < numUsers; i++) {
            final GenericRecord user = new GenericData.Record(schema);
            user.put("name", "Bob" + i);
            user.put("favorite_number", i);


            final GenericData.Array<String> colors = new GenericData.Array<>(1, favoriteColorsSchema);
            colors.add("blue" + i);

            user.put("favorite_color", colors);

            writer.write(user);
        }
    }
}
 
Example 16
Source File: FastGenericDeserializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test(groups = {"deserializationTest"}, dataProvider = "Implementation")
public void shouldReadPermutatedEnum(Implementation implementation) {
  // given
  Schema enumSchema = createEnumSchema("testEnum", new String[]{"A", "B", "C", "D", "E"});
  Schema recordSchema = createRecord(
      createField("testEnum", enumSchema),
      createUnionField("testEnumUnion", enumSchema),
      createArrayFieldSchema("testEnumArray", enumSchema),
      createArrayFieldSchema("testEnumUnionArray", createUnionSchema(enumSchema)));

  GenericRecord originalRecord = new GenericData.Record(recordSchema);
  originalRecord.put("testEnum",
      AvroCompatibilityHelper.newEnumSymbol(enumSchema, "A"));//new GenericData.EnumSymbol("A"));
  originalRecord.put("testEnumUnion",
      AvroCompatibilityHelper.newEnumSymbol(enumSchema, "B"));//new GenericData.EnumSymbol("B"));
  originalRecord.put("testEnumArray",
      Arrays.asList(AvroCompatibilityHelper.newEnumSymbol(enumSchema, "C")));//new GenericData.EnumSymbol("C")));
  originalRecord.put("testEnumUnionArray",
      Arrays.asList(AvroCompatibilityHelper.newEnumSymbol(enumSchema, "D")));//new GenericData.EnumSymbol("D")));

  Schema enumSchema1 = createEnumSchema("testEnum", new String[]{"B", "A", "D", "E", "C"});
  Schema recordSchema1 = createRecord(
      createField("testEnum", enumSchema1),
      createUnionField("testEnumUnion", enumSchema1),
      createArrayFieldSchema("testEnumArray", enumSchema1),
      createArrayFieldSchema("testEnumUnionArray", createUnionSchema(enumSchema1)));

  // when
  GenericRecord record = implementation.decode(recordSchema, recordSchema1, genericDataAsDecoder(originalRecord));

  // then
  Assert.assertEquals("A", record.get("testEnum").toString());
  Assert.assertEquals("B", record.get("testEnumUnion").toString());
  Assert.assertEquals("C", ((List<GenericData.EnumSymbol>) record.get("testEnumArray")).get(0).toString());
  Assert.assertEquals("D", ((List<GenericData.EnumSymbol>) record.get("testEnumUnionArray")).get(0).toString());
}
 
Example 17
Source File: TestMergeContent.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void testSimpleAvroConcat() throws IOException, InterruptedException {
    final TestRunner runner = TestRunners.newTestRunner(new MergeContent());
    runner.setProperty(MergeContent.MAX_ENTRIES, "3");
    runner.setProperty(MergeContent.MIN_ENTRIES, "3");
    runner.setProperty(MergeContent.MERGE_FORMAT, MergeContent.MERGE_FORMAT_AVRO);

    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/user.avsc"));

    final GenericRecord user1 = new GenericData.Record(schema);
    user1.put("name", "Alyssa");
    user1.put("favorite_number", 256);

    final GenericRecord user2 = new GenericData.Record(schema);
    user2.put("name", "Ben");
    user2.put("favorite_number", 7);
    user2.put("favorite_color", "red");

    final GenericRecord user3 = new GenericData.Record(schema);
    user3.put("name", "John");
    user3.put("favorite_number", 5);
    user3.put("favorite_color", "blue");

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = serializeAvroRecord(schema, user1, datumWriter);
    final ByteArrayOutputStream out2 = serializeAvroRecord(schema, user2, datumWriter);
    final ByteArrayOutputStream out3 = serializeAvroRecord(schema, user3, datumWriter);

    runner.enqueue(out1.toByteArray());
    runner.enqueue(out2.toByteArray());
    runner.enqueue(out3.toByteArray());

    runner.run();
    runner.assertQueueEmpty();
    runner.assertTransferCount(MergeContent.REL_MERGED, 1);
    runner.assertTransferCount(MergeContent.REL_FAILURE, 0);
    runner.assertTransferCount(MergeContent.REL_ORIGINAL, 3);

    final MockFlowFile bundle = runner.getFlowFilesForRelationship(MergeContent.REL_MERGED).get(0);
    bundle.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/avro-binary");

    // create a reader for the merged content
    byte[] data = runner.getContentAsByteArray(bundle);
    final Map<String, GenericRecord> users = getGenericRecordMap(data, schema, "name");

    Assert.assertEquals(3, users.size());
    Assert.assertTrue(users.containsKey("Alyssa"));
    Assert.assertTrue(users.containsKey("Ben"));
    Assert.assertTrue(users.containsKey("John"));
}
 
Example 18
Source File: TestMergeContent.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void testAvroConcatWithDifferentSchemas() throws IOException, InterruptedException {
    final TestRunner runner = TestRunners.newTestRunner(new MergeContent());
    runner.setProperty(MergeContent.MAX_ENTRIES, "3");
    runner.setProperty(MergeContent.MIN_ENTRIES, "3");
    runner.setProperty(MergeContent.MERGE_FORMAT, MergeContent.MERGE_FORMAT_AVRO);

    final Schema schema1 = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/user.avsc"));
    final Schema schema2 = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/place.avsc"));

    final GenericRecord record1 = new GenericData.Record(schema1);
    record1.put("name", "Alyssa");
    record1.put("favorite_number", 256);

    final GenericRecord record2 = new GenericData.Record(schema2);
    record2.put("name", "Some Place");

    final GenericRecord record3 = new GenericData.Record(schema1);
    record3.put("name", "John");
    record3.put("favorite_number", 5);
    record3.put("favorite_color", "blue");

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema1);
    final ByteArrayOutputStream out1 = serializeAvroRecord(schema1, record1, datumWriter);
    final ByteArrayOutputStream out2 = serializeAvroRecord(schema2, record2, datumWriter);
    final ByteArrayOutputStream out3 = serializeAvroRecord(schema1, record3, datumWriter);

    runner.enqueue(out1.toByteArray());
    runner.enqueue(out2.toByteArray());
    runner.enqueue(out3.toByteArray());

    runner.run();
    runner.assertQueueEmpty();
    runner.assertTransferCount(MergeContent.REL_MERGED, 1);
    runner.assertTransferCount(MergeContent.REL_FAILURE, 1);
    runner.assertTransferCount(MergeContent.REL_ORIGINAL, 3);

    final MockFlowFile bundle = runner.getFlowFilesForRelationship(MergeContent.REL_MERGED).get(0);
    bundle.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/avro-binary");

    final byte[] data = runner.getContentAsByteArray(bundle);
    final Map<String, GenericRecord> users = getGenericRecordMap(data, schema1, "name");
    Assert.assertEquals(2, users.size());
    Assert.assertTrue(users.containsKey("Alyssa"));
    Assert.assertTrue(users.containsKey("John"));

    final MockFlowFile failure = runner.getFlowFilesForRelationship(MergeContent.REL_FAILURE).get(0);
    final byte[] failureData = runner.getContentAsByteArray(failure);
    final Map<String, GenericRecord> places = getGenericRecordMap(failureData, schema2, "name");
    Assert.assertEquals(1, places.size());
    Assert.assertTrue(places.containsKey("Some Place"));
}
 
Example 19
Source File: FastGenericDeserializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Test(groups = {"deserializationTest"}, dataProvider = "Implementation")
public void shouldReadPrimitives(Implementation implementation) {
  // given
  Schema recordSchema = createRecord(
      createField("testInt", Schema.create(Schema.Type.INT)),
      createPrimitiveUnionFieldSchema("testIntUnion", Schema.Type.INT),
      createField("testString", Schema.create(Schema.Type.STRING)),
      createPrimitiveUnionFieldSchema("testStringUnion", Schema.Type.STRING),
      createField("testLong", Schema.create(Schema.Type.LONG)),
      createPrimitiveUnionFieldSchema("testLongUnion", Schema.Type.LONG),
      createField("testDouble", Schema.create(Schema.Type.DOUBLE)),
      createPrimitiveUnionFieldSchema("testDoubleUnion", Schema.Type.DOUBLE),
      createField("testFloat", Schema.create(Schema.Type.FLOAT)),
      createPrimitiveUnionFieldSchema("testFloatUnion", Schema.Type.FLOAT),
      createField("testBoolean", Schema.create(Schema.Type.BOOLEAN)),
      createPrimitiveUnionFieldSchema("testBooleanUnion", Schema.Type.BOOLEAN),
      createField("testBytes", Schema.create(Schema.Type.BYTES)),
      createPrimitiveUnionFieldSchema("testBytesUnion", Schema.Type.BYTES));

  GenericRecord record = new GenericData.Record(recordSchema);
  record.put("testInt", 1);
  record.put("testIntUnion", 1);
  record.put("testString", "aaa");
  record.put("testStringUnion", "aaa");
  record.put("testLong", 1l);
  record.put("testLongUnion", 1l);
  record.put("testDouble", 1.0);
  record.put("testDoubleUnion", 1.0);
  record.put("testFloat", 1.0f);
  record.put("testFloatUnion", 1.0f);
  record.put("testBoolean", true);
  record.put("testBooleanUnion", true);
  record.put("testBytes", ByteBuffer.wrap(new byte[]{0x01, 0x02}));
  record.put("testBytesUnion", ByteBuffer.wrap(new byte[]{0x01, 0x02}));

  // when
  GenericRecord decodedRecord = implementation.decode(recordSchema, recordSchema, genericDataAsDecoder(record));

  // then
  Assert.assertEquals(1, decodedRecord.get("testInt"));
  Assert.assertEquals(1, decodedRecord.get("testIntUnion"));
  Assert.assertEquals(new Utf8("aaa"), decodedRecord.get("testString"));
  Assert.assertEquals(new Utf8("aaa"), decodedRecord.get("testStringUnion"));
  Assert.assertEquals(1l, decodedRecord.get("testLong"));
  Assert.assertEquals(1l, decodedRecord.get("testLongUnion"));
  Assert.assertEquals(1.0, decodedRecord.get("testDouble"));
  Assert.assertEquals(1.0, decodedRecord.get("testDoubleUnion"));
  Assert.assertEquals(1.0f, decodedRecord.get("testFloat"));
  Assert.assertEquals(1.0f, decodedRecord.get("testFloatUnion"));
  Assert.assertEquals(true, decodedRecord.get("testBoolean"));
  Assert.assertEquals(true, decodedRecord.get("testBooleanUnion"));
  Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), decodedRecord.get("testBytes"));
  Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), decodedRecord.get("testBytesUnion"));
}
 
Example 20
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testWriteAvroWithCustomWriter() throws Exception {
  SerializableFunction<AvroWriteRequest<InputRecord>, GenericRecord> formatFunction =
      r -> {
        GenericRecord rec = new GenericData.Record(r.getSchema());
        InputRecord i = r.getElement();
        rec.put("strVal", i.strVal());
        rec.put("longVal", i.longVal());
        rec.put("doubleVal", i.doubleVal());
        rec.put("instantVal", i.instantVal().getMillis() * 1000);
        return rec;
      };

  SerializableFunction<org.apache.avro.Schema, DatumWriter<GenericRecord>> customWriterFactory =
      s ->
          new GenericDatumWriter<GenericRecord>() {
            @Override
            protected void writeString(org.apache.avro.Schema schema, Object datum, Encoder out)
                throws IOException {
              super.writeString(schema, datum.toString() + "_custom", out);
            }
          };

  p.apply(
          Create.of(
                  InputRecord.create("test", 1, 1.0, Instant.parse("2019-01-01T00:00:00Z")),
                  InputRecord.create("test2", 2, 2.0, Instant.parse("2019-02-01T00:00:00Z")))
              .withCoder(INPUT_RECORD_CODER))
      .apply(
          BigQueryIO.<InputRecord>write()
              .to("dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withSchema(
                  new TableSchema()
                      .setFields(
                          ImmutableList.of(
                              new TableFieldSchema().setName("strVal").setType("STRING"),
                              new TableFieldSchema().setName("longVal").setType("INTEGER"),
                              new TableFieldSchema().setName("doubleVal").setType("FLOAT"),
                              new TableFieldSchema().setName("instantVal").setType("TIMESTAMP"))))
              .withTestServices(fakeBqServices)
              .withAvroWriter(formatFunction, customWriterFactory)
              .withoutValidation());
  p.run();

  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(
          new TableRow()
              .set("strVal", "test_custom")
              .set("longVal", "1")
              .set("doubleVal", 1.0D)
              .set("instantVal", "2019-01-01 00:00:00 UTC"),
          new TableRow()
              .set("strVal", "test2_custom")
              .set("longVal", "2")
              .set("doubleVal", 2.0D)
              .set("instantVal", "2019-02-01 00:00:00 UTC")));
}