org.apache.flink.formats.avro.typeutils.AvroSchemaConverter Java Examples

The following examples show how to use org.apache.flink.formats.avro.typeutils.AvroSchemaConverter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AvroFileSystemFormatFactory.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public BulkWriter<RowData> create(FSDataOutputStream out) throws IOException {
	BulkWriter<GenericRecord> writer = factory.create(out);
	AvroRowDataSerializationSchema.SerializationRuntimeConverter converter =
			AvroRowDataSerializationSchema.createRowConverter(rowType);
	Schema schema = AvroSchemaConverter.convertToSchema(rowType);
	return new BulkWriter<RowData>() {

		@Override
		public void addElement(RowData element) throws IOException {
			GenericRecord record = (GenericRecord) converter.convert(schema, element);
			writer.addElement(record);
		}

		@Override
		public void flush() throws IOException {
			writer.flush();
		}

		@Override
		public void finish() throws IOException {
			writer.finish();
		}
	};
}
 
Example #2
Source File: AvroRowDeserializationSchema.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a Avro deserialization schema for the given specific record class. Having the
 * concrete Avro record class might improve performance.
 *
 * @param recordClazz Avro record class used to deserialize Avro's record to Flink's row
 */
public AvroRowDeserializationSchema(Class<? extends SpecificRecord> recordClazz) {
	Preconditions.checkNotNull(recordClazz, "Avro record class must not be null.");
	this.recordClazz = recordClazz;
	schema = SpecificData.get().getSchema(recordClazz);
	typeInfo = (RowTypeInfo) AvroSchemaConverter.convertToTypeInfo(recordClazz);
	schemaString = schema.toString();
	record = (IndexedRecord) SpecificData.newInstance(recordClazz, schema);
	datumReader = new SpecificDatumReader<>(schema);
	inputStream = new MutableByteArrayInputStream();
	decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
}
 
Example #3
Source File: AvroRowDeserializationSchema.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a Avro deserialization schema for the given Avro schema string.
 *
 * @param avroSchemaString Avro schema string to deserialize Avro's record to Flink's row
 */
public AvroRowDeserializationSchema(String avroSchemaString) {
	Preconditions.checkNotNull(avroSchemaString, "Avro schema must not be null.");
	recordClazz = null;
	final TypeInformation<?> typeInfo = AvroSchemaConverter.convertToTypeInfo(avroSchemaString);
	Preconditions.checkArgument(typeInfo instanceof RowTypeInfo, "Row type information expected.");
	this.typeInfo = (RowTypeInfo) typeInfo;
	schemaString = avroSchemaString;
	schema = new Schema.Parser().parse(avroSchemaString);
	record = new GenericData.Record(schema);
	datumReader = new GenericDatumReader<>(schema);
	inputStream = new MutableByteArrayInputStream();
	decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
}
 
Example #4
Source File: AvroRowDeserializationSchema.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream inputStream) throws ClassNotFoundException, IOException {
	recordClazz = (Class<? extends SpecificRecord>) inputStream.readObject();
	schemaString = inputStream.readUTF();
	typeInfo = (RowTypeInfo) AvroSchemaConverter.<Row>convertToTypeInfo(schemaString);
	schema = new Schema.Parser().parse(schemaString);
	if (recordClazz != null) {
		record = (SpecificRecord) SpecificData.newInstance(recordClazz, schema);
	} else {
		record = new GenericData.Record(schema);
	}
	datumReader = new SpecificDatumReader<>(schema);
	this.inputStream = new MutableByteArrayInputStream();
	decoder = DecoderFactory.get().binaryDecoder(this.inputStream, null);
}
 
Example #5
Source File: AvroRowDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a Avro deserialization schema for the given specific record class. Having the
 * concrete Avro record class might improve performance.
 *
 * @param recordClazz Avro record class used to deserialize Avro's record to Flink's row
 */
public AvroRowDeserializationSchema(Class<? extends SpecificRecord> recordClazz) {
	Preconditions.checkNotNull(recordClazz, "Avro record class must not be null.");
	this.recordClazz = recordClazz;
	schema = SpecificData.get().getSchema(recordClazz);
	typeInfo = (RowTypeInfo) AvroSchemaConverter.convertToTypeInfo(recordClazz);
	schemaString = schema.toString();
	record = (IndexedRecord) SpecificData.newInstance(recordClazz, schema);
	datumReader = new SpecificDatumReader<>(schema);
	inputStream = new MutableByteArrayInputStream();
	decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
}
 
Example #6
Source File: AvroRowDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a Avro deserialization schema for the given Avro schema string.
 *
 * @param avroSchemaString Avro schema string to deserialize Avro's record to Flink's row
 */
public AvroRowDeserializationSchema(String avroSchemaString) {
	Preconditions.checkNotNull(avroSchemaString, "Avro schema must not be null.");
	recordClazz = null;
	final TypeInformation<?> typeInfo = AvroSchemaConverter.convertToTypeInfo(avroSchemaString);
	Preconditions.checkArgument(typeInfo instanceof RowTypeInfo, "Row type information expected.");
	this.typeInfo = (RowTypeInfo) typeInfo;
	schemaString = avroSchemaString;
	schema = new Schema.Parser().parse(avroSchemaString);
	record = new GenericData.Record(schema);
	datumReader = new GenericDatumReader<>(schema);
	inputStream = new MutableByteArrayInputStream();
	decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
}
 
Example #7
Source File: AvroRowDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream inputStream) throws ClassNotFoundException, IOException {
	recordClazz = (Class<? extends SpecificRecord>) inputStream.readObject();
	schemaString = inputStream.readUTF();
	typeInfo = (RowTypeInfo) AvroSchemaConverter.<Row>convertToTypeInfo(schemaString);
	schema = new Schema.Parser().parse(schemaString);
	if (recordClazz != null) {
		record = (SpecificRecord) SpecificData.newInstance(recordClazz, schema);
	} else {
		record = new GenericData.Record(schema);
	}
	datumReader = new SpecificDatumReader<>(schema);
	this.inputStream = new MutableByteArrayInputStream();
	decoder = DecoderFactory.get().binaryDecoder(this.inputStream, null);
}
 
Example #8
Source File: AvroRowDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a Avro deserialization schema for the given specific record class. Having the
 * concrete Avro record class might improve performance.
 *
 * @param recordClazz Avro record class used to deserialize Avro's record to Flink's row
 */
public AvroRowDeserializationSchema(Class<? extends SpecificRecord> recordClazz) {
	Preconditions.checkNotNull(recordClazz, "Avro record class must not be null.");
	this.recordClazz = recordClazz;
	schema = SpecificData.get().getSchema(recordClazz);
	typeInfo = (RowTypeInfo) AvroSchemaConverter.convertToTypeInfo(recordClazz);
	schemaString = schema.toString();
	record = (IndexedRecord) SpecificData.newInstance(recordClazz, schema);
	datumReader = new SpecificDatumReader<>(schema);
	inputStream = new MutableByteArrayInputStream();
	decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
}
 
Example #9
Source File: AvroRowDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a Avro deserialization schema for the given Avro schema string.
 *
 * @param avroSchemaString Avro schema string to deserialize Avro's record to Flink's row
 */
public AvroRowDeserializationSchema(String avroSchemaString) {
	Preconditions.checkNotNull(avroSchemaString, "Avro schema must not be null.");
	recordClazz = null;
	final TypeInformation<?> typeInfo = AvroSchemaConverter.convertToTypeInfo(avroSchemaString);
	Preconditions.checkArgument(typeInfo instanceof RowTypeInfo, "Row type information expected.");
	this.typeInfo = (RowTypeInfo) typeInfo;
	schemaString = avroSchemaString;
	schema = new Schema.Parser().parse(avroSchemaString);
	record = new GenericData.Record(schema);
	datumReader = new GenericDatumReader<>(schema);
	inputStream = new MutableByteArrayInputStream();
	decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
}
 
Example #10
Source File: AvroRowDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream inputStream) throws ClassNotFoundException, IOException {
	recordClazz = (Class<? extends SpecificRecord>) inputStream.readObject();
	schemaString = inputStream.readUTF();
	typeInfo = (RowTypeInfo) AvroSchemaConverter.<Row>convertToTypeInfo(schemaString);
	schema = new Schema.Parser().parse(schemaString);
	if (recordClazz != null) {
		record = (SpecificRecord) SpecificData.newInstance(recordClazz, schema);
	} else {
		record = new GenericData.Record(schema);
	}
	datumReader = new SpecificDatumReader<>(schema);
	this.inputStream = new MutableByteArrayInputStream();
	decoder = DecoderFactory.get().binaryDecoder(this.inputStream, null);
}
 
Example #11
Source File: AvroFileSystemFormatFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);
	Schema schema = AvroSchemaConverter.convertToSchema(formatRowType);
	record = new GenericData.Record(schema);
	rowData = PartitionPathUtils.fillPartitionValueForRecord(
			fieldNames,
			fieldTypes,
			selectFields,
			partitionKeys,
			currentSplit.getPath(),
			defaultPartValue);
	this.converter = AvroRowDataDeserializationSchema.createRowConverter(formatRowType);
}
 
Example #12
Source File: AvroFileSystemFormatFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
private RowDataAvroWriterFactory(RowType rowType, String codec) {
	this.rowType = rowType;
	this.factory = new AvroWriterFactory<>((AvroBuilder<GenericRecord>) out -> {
		Schema schema = AvroSchemaConverter.convertToSchema(rowType);
		DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
		DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);

		if (codec != null) {
			dataFileWriter.setCodec(CodecFactory.fromString(codec));
		}
		dataFileWriter.create(schema, out);
		return dataFileWriter;
	});
}
 
Example #13
Source File: AvroRowDataSerializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(InitializationContext context) throws Exception {
	this.schema = AvroSchemaConverter.convertToSchema(rowType);
	datumWriter = new SpecificDatumWriter<>(schema);
	arrayOutputStream = new ByteArrayOutputStream();
	encoder = EncoderFactory.get().binaryEncoder(arrayOutputStream, null);
}
 
Example #14
Source File: AvroRowDataDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(InitializationContext context) throws Exception {
	final Schema schema = AvroSchemaConverter.convertToSchema(rowType);
	this.record = new GenericData.Record(schema);
	this.datumReader = new SpecificDatumReader<>(schema);
	this.inputStream = new MutableByteArrayInputStream();
	this.decoder = DecoderFactory.get().binaryDecoder(this.inputStream, null);
}
 
Example #15
Source File: AvroRowDataDeSerializationSchemaTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSerializeDeserialize() throws Exception {
	final DataType dataType = ROW(
		FIELD("bool", BOOLEAN()),
		FIELD("tinyint", TINYINT()),
		FIELD("smallint", SMALLINT()),
		FIELD("int", INT()),
		FIELD("bigint", BIGINT()),
		FIELD("float", FLOAT()),
		FIELD("double", DOUBLE()),
		FIELD("name", STRING()),
		FIELD("bytes", BYTES()),
		FIELD("decimal", DECIMAL(19, 6)),
		FIELD("doubles", ARRAY(DOUBLE())),
		FIELD("time", TIME(0)),
		FIELD("date", DATE()),
		FIELD("timestamp3", TIMESTAMP(3)),
		FIELD("timestamp3_2", TIMESTAMP(3)),
		FIELD("map", MAP(STRING(), BIGINT())),
		FIELD("map2map", MAP(STRING(), MAP(STRING(), INT()))),
		FIELD("map2array", MAP(STRING(), ARRAY(INT()))));
	final RowType rowType = (RowType) dataType.getLogicalType();
	final TypeInformation<RowData> typeInfo = new RowDataTypeInfo(rowType);

	final Schema schema = AvroSchemaConverter.convertToSchema(rowType);
	final GenericRecord record = new GenericData.Record(schema);
	record.put(0, true);
	record.put(1, (int) Byte.MAX_VALUE);
	record.put(2, (int) Short.MAX_VALUE);
	record.put(3, 33);
	record.put(4, 44L);
	record.put(5, 12.34F);
	record.put(6, 23.45);
	record.put(7, "hello avro");
	record.put(8, ByteBuffer.wrap(new byte[]{1, 2, 4, 5, 6, 7, 8, 12}));

	record.put(9, ByteBuffer.wrap(
			BigDecimal.valueOf(123456789, 6).unscaledValue().toByteArray()));

	List<Double> doubles = new ArrayList<>();
	doubles.add(1.2);
	doubles.add(3.4);
	doubles.add(567.8901);
	record.put(10, doubles);

	record.put(11, 18397);
	record.put(12, 10087);
	record.put(13, 1589530213123L);
	record.put(14, 1589530213122L);

	Map<String, Long> map = new HashMap<>();
	map.put("flink", 12L);
	map.put("avro", 23L);
	record.put(15, map);

	Map<String, Map<String, Integer>> map2map = new HashMap<>();
	Map<String, Integer> innerMap = new HashMap<>();
	innerMap.put("inner_key1", 123);
	innerMap.put("inner_key2", 234);
	map2map.put("outer_key", innerMap);
	record.put(16, map2map);

	List<Integer> list1 = Arrays.asList(1, 2, 3, 4, 5, 6);
	List<Integer> list2 = Arrays.asList(11, 22, 33, 44, 55);
	Map<String, List<Integer>> map2list = new HashMap<>();
	map2list.put("list1", list1);
	map2list.put("list2", list2);
	record.put(17, map2list);

	AvroRowDataSerializationSchema serializationSchema = new AvroRowDataSerializationSchema(rowType);
	serializationSchema.open(null);
	AvroRowDataDeserializationSchema deserializationSchema =
		new AvroRowDataDeserializationSchema(rowType, typeInfo);
	deserializationSchema.open(null);

	ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
	GenericDatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(schema);
	Encoder encoder = EncoderFactory.get().binaryEncoder(byteArrayOutputStream, null);
	datumWriter.write(record, encoder);
	encoder.flush();
	byte[] input = byteArrayOutputStream.toByteArray();

	RowData rowData = deserializationSchema.deserialize(input);
	byte[] output = serializationSchema.serialize(rowData);

	assertArrayEquals(input, output);
}