Java Code Examples for org.apache.avro.generic.GenericData#setStringType()

The following examples show how to use org.apache.avro.generic.GenericData#setStringType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldReadArrayOfJavaStrings() {
    // given
    Schema javaStringSchema = Schema.create(Schema.Type.STRING);
    GenericData.setStringType(javaStringSchema, GenericData.StringType.String);
    Schema javaStringArraySchema = Schema.createArray(javaStringSchema);

    GenericData.Array<String> javaStringArray = new GenericData.Array<>(0, javaStringArraySchema);
    javaStringArray.add("aaa");
    javaStringArray.add("abc");

    GenericData.Array<String> resultJavaStringArray = deserializeGenericFast(javaStringArraySchema, javaStringArraySchema,
            serializeGeneric(javaStringArray));

    // then
    Assert.assertEquals(2, resultJavaStringArray.size());
    Assert.assertEquals("aaa", resultJavaStringArray.get(0));
    Assert.assertEquals("abc", resultJavaStringArray.get(1));
}
 
Example 2
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldReadMapOfJavaStrings() {
    // given
    Schema stringMapSchema = Schema.createMap(Schema.create(Schema.Type.STRING));
    Schema javaStringSchema = Schema.create(Schema.Type.STRING);
    GenericData.setStringType(javaStringSchema, GenericData.StringType.String);
    Schema javaStringMapSchema = Schema.createMap(javaStringSchema);

    Map<String, String> stringMap = new HashMap<>(0);
    stringMap.put("1", "abc");
    stringMap.put("2", "aaa");

    // when
    Map<Utf8, String> resultJavaStringMap = deserializeGenericFast(stringMapSchema, javaStringMapSchema,
            serializeGeneric(stringMap, javaStringMapSchema));

    // then
    Assert.assertEquals(2, resultJavaStringMap.size());
    Assert.assertEquals("abc", resultJavaStringMap.get(new Utf8("1")));
    Assert.assertEquals("aaa", resultJavaStringMap.get(new Utf8("2")));
}
 
Example 3
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldReadJavaStringKeyedMapOfRecords() {
    // given
    Schema recordSchema = createRecord("record",
            createPrimitiveUnionFieldSchema("field", Schema.Type.STRING));

    Schema mapRecordSchema = Schema.createMap(recordSchema);
    GenericData.setStringType(mapRecordSchema, GenericData.StringType.String);

    GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(recordSchema);
    subRecordBuilder.set("field", "abc");

    Map<String, GenericData.Record> recordsMap = new HashMap<>();
    recordsMap.put("1", subRecordBuilder.build());
    recordsMap.put("2", subRecordBuilder.build());

    // when
    Map<String, GenericRecord> mapWithStringKeys = deserializeGenericFast(mapRecordSchema, mapRecordSchema, serializeGeneric(recordsMap, mapRecordSchema));

    // then
    Assert.assertEquals(2, mapWithStringKeys.size());
    Assert.assertEquals("abc", mapWithStringKeys.get("1").get("field").toString());
    Assert.assertEquals("abc", mapWithStringKeys.get("2").get("field").toString());
}
 
Example 4
Source File: TestWriteAvroResultWithSchema.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
protected GenericRecord readRecord(final InputStream in, final Schema schema) throws IOException {
    final DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(in, new GenericDatumReader<>());
    final Schema avroSchema = dataFileStream.getSchema();
    GenericData.setStringType(avroSchema, StringType.String);
    final GenericRecord avroRecord = dataFileStream.next();

    return avroRecord;
}
 
Example 5
Source File: TestWriteAvroResultWithSchema.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
protected List<GenericRecord> readRecords(final InputStream in, final Schema schema, final int recordCount) throws IOException {
    final DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(in, new GenericDatumReader<>());
    final Schema avroSchema = dataFileStream.getSchema();
    GenericData.setStringType(avroSchema, StringType.String);

    List<GenericRecord> records = new ArrayList<>();
    for (int i = 0; i < recordCount; i++) {
        records.add(dataFileStream.next());
    }

    return records;
}
 
Example 6
Source File: BucketingSinkTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * This tests {@link AvroKeyValueSinkWriter}
 * with non-rolling output and with compression.
 */
@Test
public void testNonRollingAvroKeyValueWithCompressionWriter() throws Exception {
	final String outPath = hdfsURI + "/avro-kv-no-comp-non-rolling-out";

	final int numElements = 20;

	Map<String, String> properties = new HashMap<>();
	Schema keySchema = Schema.create(Schema.Type.INT);
	Schema valueSchema = Schema.create(Schema.Type.STRING);
	properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
	properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
	properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true));
	properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC);

	BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath)
		.setWriter(new AvroKeyValueSinkWriter<Integer, String>(properties))
		.setBucketer(new BasePathBucketer<Tuple2<Integer, String>>())
		.setPartPrefix(PART_PREFIX)
		.setPendingPrefix("")
		.setPendingSuffix("");

	OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness =
		createTestSink(sink, 1, 0);

	testHarness.setProcessingTime(0L);

	testHarness.setup();
	testHarness.open();

	for (int i = 0; i < numElements; i++) {
		testHarness.processElement(new StreamRecord<>(Tuple2.of(
			i, "message #" + Integer.toString(i)
		)));
	}

	testHarness.close();

	GenericData.setStringType(valueSchema, GenericData.StringType.String);
	Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema);

	FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0"));

	SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema);
	DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader);
	for (int i = 0; i < numElements; i++) {
		AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry =
			new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next());
		int key = wrappedEntry.getKey();
		Assert.assertEquals(i, key);
		String value = wrappedEntry.getValue();
		Assert.assertEquals("message #" + i, value);
	}

	dataFileStream.close();
	inStream.close();
}
 
Example 7
Source File: BucketingSinkTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * This tests user defined hdfs configuration.
 * @throws Exception
 */
@Test
public void testUserDefinedConfiguration() throws Exception {
	final String outPath = hdfsURI + "/string-non-rolling-with-config";
	final int numElements = 20;

	Map<String, String> properties = new HashMap<>();
	Schema keySchema = Schema.create(Schema.Type.INT);
	Schema valueSchema = Schema.create(Schema.Type.STRING);
	properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
	properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
	properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true));
	properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC);

	Configuration conf = new Configuration();
	conf.set("io.file.buffer.size", "40960");

	BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath)
		.setFSConfig(conf)
		.setWriter(new StreamWriterWithConfigCheck<Integer, String>(properties, "io.file.buffer.size", "40960"))
		.setBucketer(new BasePathBucketer<Tuple2<Integer, String>>())
		.setPartPrefix(PART_PREFIX)
		.setPendingPrefix("")
		.setPendingSuffix("");

	OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness =
		createTestSink(sink, 1, 0);

	testHarness.setProcessingTime(0L);

	testHarness.setup();
	testHarness.open();

	for (int i = 0; i < numElements; i++) {
		testHarness.processElement(new StreamRecord<>(Tuple2.of(
			i, "message #" + Integer.toString(i)
		)));
	}

	testHarness.close();

	GenericData.setStringType(valueSchema, GenericData.StringType.String);
	Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema);

	FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0"));

	SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema);
	DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader);
	for (int i = 0; i < numElements; i++) {
		AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry =
			new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next());
		int key = wrappedEntry.getKey();
		Assert.assertEquals(i, key);
		String value = wrappedEntry.getValue();
		Assert.assertEquals("message #" + i, value);
	}

	dataFileStream.close();
	inStream.close();
}
 
Example 8
Source File: BucketingSinkTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This tests {@link AvroKeyValueSinkWriter}
 * with non-rolling output and with compression.
 */
@Test
public void testNonRollingAvroKeyValueWithCompressionWriter() throws Exception {
	final String outPath = hdfsURI + "/avro-kv-no-comp-non-rolling-out";

	final int numElements = 20;

	Map<String, String> properties = new HashMap<>();
	Schema keySchema = Schema.create(Schema.Type.INT);
	Schema valueSchema = Schema.create(Schema.Type.STRING);
	properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
	properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
	properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true));
	properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC);

	BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath)
		.setWriter(new AvroKeyValueSinkWriter<Integer, String>(properties))
		.setBucketer(new BasePathBucketer<Tuple2<Integer, String>>())
		.setPartPrefix(PART_PREFIX)
		.setPendingPrefix("")
		.setPendingSuffix("");

	OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness =
		createTestSink(sink, 1, 0);

	testHarness.setProcessingTime(0L);

	testHarness.setup();
	testHarness.open();

	for (int i = 0; i < numElements; i++) {
		testHarness.processElement(new StreamRecord<>(Tuple2.of(
			i, "message #" + Integer.toString(i)
		)));
	}

	testHarness.close();

	GenericData.setStringType(valueSchema, GenericData.StringType.String);
	Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema);

	FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0"));

	SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema);
	DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader);
	for (int i = 0; i < numElements; i++) {
		AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry =
			new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next());
		int key = wrappedEntry.getKey();
		Assert.assertEquals(i, key);
		String value = wrappedEntry.getValue();
		Assert.assertEquals("message #" + i, value);
	}

	dataFileStream.close();
	inStream.close();
}
 
Example 9
Source File: BucketingSinkTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This tests user defined hdfs configuration.
 * @throws Exception
 */
@Test
public void testUserDefinedConfiguration() throws Exception {
	final String outPath = hdfsURI + "/string-non-rolling-with-config";
	final int numElements = 20;

	Map<String, String> properties = new HashMap<>();
	Schema keySchema = Schema.create(Schema.Type.INT);
	Schema valueSchema = Schema.create(Schema.Type.STRING);
	properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
	properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
	properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true));
	properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC);

	Configuration conf = new Configuration();
	conf.set("io.file.buffer.size", "40960");

	BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath)
		.setFSConfig(conf)
		.setWriter(new StreamWriterWithConfigCheck<Integer, String>(properties, "io.file.buffer.size", "40960"))
		.setBucketer(new BasePathBucketer<Tuple2<Integer, String>>())
		.setPartPrefix(PART_PREFIX)
		.setPendingPrefix("")
		.setPendingSuffix("");

	OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness =
		createTestSink(sink, 1, 0);

	testHarness.setProcessingTime(0L);

	testHarness.setup();
	testHarness.open();

	for (int i = 0; i < numElements; i++) {
		testHarness.processElement(new StreamRecord<>(Tuple2.of(
			i, "message #" + Integer.toString(i)
		)));
	}

	testHarness.close();

	GenericData.setStringType(valueSchema, GenericData.StringType.String);
	Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema);

	FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0"));

	SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema);
	DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader);
	for (int i = 0; i < numElements; i++) {
		AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry =
			new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next());
		int key = wrappedEntry.getKey();
		Assert.assertEquals(i, key);
		String value = wrappedEntry.getValue();
		Assert.assertEquals("message #" + i, value);
	}

	dataFileStream.close();
	inStream.close();
}
 
Example 10
Source File: FastGenericSerializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldWritePrimitives() {
    // given
    Schema javaLangStringSchema = Schema.create(Schema.Type.STRING);
    GenericData.setStringType(javaLangStringSchema, GenericData.StringType.String);
    Schema recordSchema = createRecord("testRecord",
            createField("testInt", Schema.create(Schema.Type.INT)),
            createPrimitiveUnionFieldSchema("testIntUnion", Schema.Type.INT),
            createField("testString", Schema.create(Schema.Type.STRING)),
            createPrimitiveUnionFieldSchema("testStringUnion", Schema.Type.STRING),
            createField("testJavaString", javaLangStringSchema),
            createUnionField("testJavaStringUnion", javaLangStringSchema),
            createField("testLong", Schema.create(Schema.Type.LONG)),
            createPrimitiveUnionFieldSchema("testLongUnion", Schema.Type.LONG),
            createField("testDouble", Schema.create(Schema.Type.DOUBLE)),
            createPrimitiveUnionFieldSchema("testDoubleUnion", Schema.Type.DOUBLE),
            createField("testFloat", Schema.create(Schema.Type.FLOAT)),
            createPrimitiveUnionFieldSchema("testFloatUnion", Schema.Type.FLOAT),
            createField("testBoolean", Schema.create(Schema.Type.BOOLEAN)),
            createPrimitiveUnionFieldSchema("testBooleanUnion", Schema.Type.BOOLEAN),
            createField("testBytes", Schema.create(Schema.Type.BYTES)),
            createPrimitiveUnionFieldSchema("testBytesUnion", Schema.Type.BYTES));

    GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema);
    builder.set("testInt", 1);
    builder.set("testIntUnion", 1);
    builder.set("testString", "aaa");
    builder.set("testStringUnion", "aaa");
    builder.set("testJavaString", "aaa");
    builder.set("testJavaStringUnion", "aaa");
    builder.set("testLong", 1L);
    builder.set("testLongUnion", 1L);
    builder.set("testDouble", 1.0);
    builder.set("testDoubleUnion", 1.0);
    builder.set("testFloat", 1.0f);
    builder.set("testFloatUnion", 1.0f);
    builder.set("testBoolean", true);
    builder.set("testBooleanUnion", true);
    builder.set("testBytes", ByteBuffer.wrap(new byte[]{0x01, 0x02}));
    builder.set("testBytesUnion", ByteBuffer.wrap(new byte[]{0x01, 0x02}));

    // when
    GenericRecord record = deserializeGeneric(recordSchema, serializeGenericFast(builder.build()));

    // then
    Assert.assertEquals(1, record.get("testInt"));
    Assert.assertEquals(1, record.get("testIntUnion"));
    Assert.assertEquals("aaa", record.get("testString").toString());
    Assert.assertEquals("aaa", record.get("testStringUnion").toString());
    Assert.assertEquals("aaa", record.get("testJavaString"));
    Assert.assertEquals("aaa", record.get("testJavaStringUnion"));
    Assert.assertEquals(1L, record.get("testLong"));
    Assert.assertEquals(1L, record.get("testLongUnion"));
    Assert.assertEquals(1.0, record.get("testDouble"));
    Assert.assertEquals(1.0, record.get("testDoubleUnion"));
    Assert.assertEquals(1.0f, record.get("testFloat"));
    Assert.assertEquals(1.0f, record.get("testFloatUnion"));
    Assert.assertEquals(true, record.get("testBoolean"));
    Assert.assertEquals(true, record.get("testBooleanUnion"));
    Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytes"));
    Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytesUnion"));

}
 
Example 11
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldReadPrimitives() {
    // given
    Schema javaLangStringSchema = Schema.create(Schema.Type.STRING);
    GenericData.setStringType(javaLangStringSchema, GenericData.StringType.String);
    Schema recordSchema = createRecord("testRecord",
            createField("testInt", Schema.create(Schema.Type.INT)),
            createPrimitiveUnionFieldSchema("testIntUnion", Schema.Type.INT),
            createField("testString", Schema.create(Schema.Type.STRING)),
            createPrimitiveUnionFieldSchema("testStringUnion", Schema.Type.STRING),
            createField("testJavaString", javaLangStringSchema),
            createUnionField("testJavaStringUnion", javaLangStringSchema),
            createField("testLong", Schema.create(Schema.Type.LONG)),
            createPrimitiveUnionFieldSchema("testLongUnion", Schema.Type.LONG),
            createField("testDouble", Schema.create(Schema.Type.DOUBLE)),
            createPrimitiveUnionFieldSchema("testDoubleUnion", Schema.Type.DOUBLE),
            createField("testFloat", Schema.create(Schema.Type.FLOAT)),
            createPrimitiveUnionFieldSchema("testFloatUnion", Schema.Type.FLOAT),
            createField("testBoolean", Schema.create(Schema.Type.BOOLEAN)),
            createPrimitiveUnionFieldSchema("testBooleanUnion", Schema.Type.BOOLEAN),
            createField("testBytes", Schema.create(Schema.Type.BYTES)),
            createPrimitiveUnionFieldSchema("testBytesUnion", Schema.Type.BYTES));

    GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema);
    builder.set("testInt", 1);
    builder.set("testIntUnion", 1);
    builder.set("testString", "aaa");
    builder.set("testStringUnion", "aaa");
    builder.set("testJavaString", "aaa");
    builder.set("testJavaStringUnion", "aaa");
    builder.set("testLong", 1L);
    builder.set("testLongUnion", 1L);
    builder.set("testDouble", 1.0);
    builder.set("testDoubleUnion", 1.0);
    builder.set("testFloat", 1.0f);
    builder.set("testFloatUnion", 1.0f);
    builder.set("testBoolean", true);
    builder.set("testBooleanUnion", true);
    builder.set("testBytes", ByteBuffer.wrap(new byte[]{0x01, 0x02}));
    builder.set("testBytesUnion", ByteBuffer.wrap(new byte[]{0x01, 0x02}));

    // when
    GenericRecord record = deserializeGenericFast(recordSchema, recordSchema, serializeGeneric(builder.build()));

    // then
    Assert.assertEquals(1, record.get("testInt"));
    Assert.assertEquals(1, record.get("testIntUnion"));
    Assert.assertEquals("aaa", record.get("testString").toString());
    Assert.assertEquals("aaa", record.get("testStringUnion").toString());
    Assert.assertEquals("aaa", record.get("testJavaString"));
    Assert.assertEquals("aaa", record.get("testJavaStringUnion"));
    Assert.assertEquals(1L, record.get("testLong"));
    Assert.assertEquals(1L, record.get("testLongUnion"));
    Assert.assertEquals(1.0, record.get("testDouble"));
    Assert.assertEquals(1.0, record.get("testDoubleUnion"));
    Assert.assertEquals(1.0f, record.get("testFloat"));
    Assert.assertEquals(1.0f, record.get("testFloatUnion"));
    Assert.assertEquals(true, record.get("testBoolean"));
    Assert.assertEquals(true, record.get("testBooleanUnion"));
    Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytes"));
    Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytesUnion"));

}
 
Example 12
Source File: FastSpecificSerializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldWriteMapOfRecords() {
    // given
    Schema mapRecordSchema = Schema.createMap(TestRecord.getClassSchema());
    GenericData.setStringType(mapRecordSchema, GenericData.StringType.String);

    TestRecord testRecord = emptyTestRecord();
    testRecord.put("testString", "abc");

    Map<String, TestRecord> recordsMap = new HashMap<>();
    recordsMap.put("1", testRecord);
    recordsMap.put("2", testRecord);

    // when
    Map<String, TestRecord> map = deserializeSpecific(mapRecordSchema,
            serializeSpecificFast(recordsMap, mapRecordSchema));

    // then
    Assert.assertEquals(2, map.size());
    Assert.assertEquals("abc", map.get("1").get("testString"));
    Assert.assertEquals("abc", map.get("2").get("testString"));

    // given
    mapRecordSchema = Schema.createMap(createUnionSchema(TestRecord
            .getClassSchema()));
    GenericData.setStringType(mapRecordSchema, GenericData.StringType.String);

    testRecord = emptyTestRecord();
    testRecord.put("testString", "abc");

    recordsMap = new HashMap<>();
    recordsMap.put("1", testRecord);
    recordsMap.put("2", testRecord);

    // when
    map = deserializeSpecific(mapRecordSchema, serializeSpecificFast(recordsMap, mapRecordSchema));

    // then
    Assert.assertEquals(2, map.size());
    Assert.assertEquals("abc", map.get("1").get("testString"));
    Assert.assertEquals("abc", map.get("2").get("testString"));
}
 
Example 13
Source File: BucketingSinkTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This tests {@link AvroKeyValueSinkWriter}
 * with non-rolling output and with compression.
 */
@Test
public void testNonRollingAvroKeyValueWithCompressionWriter() throws Exception {
	final String outPath = hdfsURI + "/avro-kv-no-comp-non-rolling-out";

	final int numElements = 20;

	Map<String, String> properties = new HashMap<>();
	Schema keySchema = Schema.create(Schema.Type.INT);
	Schema valueSchema = Schema.create(Schema.Type.STRING);
	properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
	properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
	properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true));
	properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC);

	BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath)
		.setWriter(new AvroKeyValueSinkWriter<Integer, String>(properties))
		.setBucketer(new BasePathBucketer<Tuple2<Integer, String>>())
		.setPartPrefix(PART_PREFIX)
		.setPendingPrefix("")
		.setPendingSuffix("");

	OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness =
		createTestSink(sink, 1, 0);

	testHarness.setProcessingTime(0L);

	testHarness.setup();
	testHarness.open();

	for (int i = 0; i < numElements; i++) {
		testHarness.processElement(new StreamRecord<>(Tuple2.of(
			i, "message #" + Integer.toString(i)
		)));
	}

	testHarness.close();

	GenericData.setStringType(valueSchema, GenericData.StringType.String);
	Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema);

	FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0"));

	SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema);
	DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader);
	for (int i = 0; i < numElements; i++) {
		AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry =
			new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next());
		int key = wrappedEntry.getKey();
		Assert.assertEquals(i, key);
		String value = wrappedEntry.getValue();
		Assert.assertEquals("message #" + i, value);
	}

	dataFileStream.close();
	inStream.close();
}
 
Example 14
Source File: BucketingSinkTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This tests user defined hdfs configuration.
 * @throws Exception
 */
@Test
public void testUserDefinedConfiguration() throws Exception {
	final String outPath = hdfsURI + "/string-non-rolling-with-config";
	final int numElements = 20;

	Map<String, String> properties = new HashMap<>();
	Schema keySchema = Schema.create(Schema.Type.INT);
	Schema valueSchema = Schema.create(Schema.Type.STRING);
	properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString());
	properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString());
	properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true));
	properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC);

	Configuration conf = new Configuration();
	conf.set("io.file.buffer.size", "40960");

	BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath)
		.setFSConfig(conf)
		.setWriter(new StreamWriterWithConfigCheck<Integer, String>(properties, "io.file.buffer.size", "40960"))
		.setBucketer(new BasePathBucketer<Tuple2<Integer, String>>())
		.setPartPrefix(PART_PREFIX)
		.setPendingPrefix("")
		.setPendingSuffix("");

	OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness =
		createTestSink(sink, 1, 0);

	testHarness.setProcessingTime(0L);

	testHarness.setup();
	testHarness.open();

	for (int i = 0; i < numElements; i++) {
		testHarness.processElement(new StreamRecord<>(Tuple2.of(
			i, "message #" + Integer.toString(i)
		)));
	}

	testHarness.close();

	GenericData.setStringType(valueSchema, GenericData.StringType.String);
	Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema);

	FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0"));

	SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema);
	DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader);
	for (int i = 0; i < numElements; i++) {
		AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry =
			new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next());
		int key = wrappedEntry.getKey();
		Assert.assertEquals(i, key);
		String value = wrappedEntry.getValue();
		Assert.assertEquals("message #" + i, value);
	}

	dataFileStream.close();
	inStream.close();
}
 
Example 15
Source File: TestExecuteSQLRecord.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void testWriteLOBsToAvro() throws Exception {
    final DBCPService dbcp = new DBCPServiceSimpleImpl("h2");
    final Map<String, String> dbcpProperties = new HashMap<>();

    runner = TestRunners.newTestRunner(ExecuteSQLRecord.class);
    runner.addControllerService("dbcp", dbcp, dbcpProperties);
    runner.enableControllerService(dbcp);
    runner.setProperty(AbstractExecuteSQL.DBCP_SERVICE, "dbcp");

    // remove previous test database, if any
    final File dbLocation = new File(DB_LOCATION);
    dbLocation.delete();

    // load test data to database
    final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
    Statement stmt = con.createStatement();

    try {
        stmt.execute("drop table TEST_NULL_INT");
    } catch (final SQLException sqle) {
    }

    stmt.execute("create table TEST_NULL_INT (id integer not null, val1 integer, val2 integer, image blob(1K), words clob(1K), "
            + "natwords nclob(1K), constraint my_pk primary key (id))");
    stmt.execute("insert into TEST_NULL_INT (id, val1, val2, image, words, natwords) VALUES (0, NULL, 1, CAST (X'DEADBEEF' AS BLOB), "
            + "CAST ('Hello World' AS CLOB), CAST ('I am an NCLOB' AS NCLOB))");

    runner.setIncomingConnection(false);
    runner.setProperty(AbstractExecuteSQL.SQL_SELECT_QUERY, "select * from TEST_NULL_INT");
    AvroRecordSetWriter recordWriter = new AvroRecordSetWriter();
    runner.addControllerService("writer", recordWriter);
    runner.setProperty(recordWriter, SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.INHERIT_RECORD_SCHEMA);
    runner.setProperty(ExecuteSQLRecord.RECORD_WRITER_FACTORY, "writer");
    runner.enableControllerService(recordWriter);
    runner.run();

    runner.assertAllFlowFilesTransferred(AbstractExecuteSQL.REL_SUCCESS, 1);
    MockFlowFile flowFile = runner.getFlowFilesForRelationship(AbstractExecuteSQL.REL_SUCCESS).get(0);
    flowFile.assertAttributeEquals(AbstractExecuteSQL.RESULT_ROW_COUNT, "1");

    ByteArrayInputStream bais = new ByteArrayInputStream(flowFile.toByteArray());
    final DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(bais, new GenericDatumReader<>());
    final Schema avroSchema = dataFileStream.getSchema();
    GenericData.setStringType(avroSchema, GenericData.StringType.String);
    final GenericRecord avroRecord = dataFileStream.next();

    Object imageObj = avroRecord.get("IMAGE");
    assertNotNull(imageObj);
    assertTrue(imageObj instanceof ByteBuffer);
    assertArrayEquals(new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF}, ((ByteBuffer) imageObj).array());

    Object wordsObj = avroRecord.get("WORDS");
    assertNotNull(wordsObj);
    assertTrue(wordsObj instanceof Utf8);
    assertEquals("Hello World", wordsObj.toString());

    Object natwordsObj = avroRecord.get("NATWORDS");
    assertNotNull(natwordsObj);
    assertTrue(natwordsObj instanceof Utf8);
    assertEquals("I am an NCLOB", natwordsObj.toString());
}